From 8c826372c4dd9538c5cd6b3ed6fbc4a367f7b837 Mon Sep 17 00:00:00 2001 From: Flammie Pirinen Date: Mon, 11 Apr 2022 17:37:24 +0200 Subject: [PATCH 01/21] copypasted analyse from suggest... --- divvunspell/src/speller/mod.rs | 48 +++++++++++++++++++++++++++++++ divvunspell/src/speller/worker.rs | 44 ++++++++++++++++++++++++++++ 2 files changed, 92 insertions(+) diff --git a/divvunspell/src/speller/mod.rs b/divvunspell/src/speller/mod.rs index 97b7d8c..1110b43 100644 --- a/divvunspell/src/speller/mod.rs +++ b/divvunspell/src/speller/mod.rs @@ -60,6 +60,14 @@ pub trait Speller { fn suggest_with_config(self: Arc, word: &str, config: &SpellerConfig) -> Vec; } +pub trait Analyser { + fn analyse(self: Arc, word: &str) -> Vec; + fn analyse_with_config(self: Arc, word: &str, config: &SpellerConfig) -> Vec; + fn suggest_with_analyse(self: Arc, word: &str) -> Vec; + fn suggest_with_analyse_with_config(self: Arc, word: &str, config: + &SpellerConfig) -> Vec; +} + impl Speller for HfstSpeller where F: crate::vfs::File + Send, @@ -125,6 +133,46 @@ where } } +impl Analyser for HfstSpeller +where + F: crate::vfs::File + Send, + T: Transducer + Send, + U: Transducer + Send, +{ + #[allow(clippy::wrong_self_convention)] + fn analyse_with_config(self: Arc, word: &str, config: &SpellerConfig) -> Vec { + if word.len() == 0 { + return vec![]; + } + + let worker = SpellerWorker::new(self.clone(), self.to_input_vec(&word), config.clone()); + + worker.analyse() + + //vec![] + } + + #[inline] + fn analyse(self: Arc, word: &str) -> Vec { + self.analyse_with_config(word, &SpellerConfig::default()) + } + + #[inline] + fn suggest_with_analyse(self: Arc, word: &str) -> Vec { + self.suggest_with_analyse_with_config(word, &SpellerConfig::default()) + } + + fn suggest_with_analyse_with_config(self: Arc, word: &str, config: &SpellerConfig) -> Vec { + if word.len() == 0 { + return vec![]; + } + let worker = SpellerWorker::new(self.clone(), self.to_input_vec(word), config.clone()); + + worker.analyse() + + } +} + #[derive(Debug)] pub struct HfstSpeller where diff --git a/divvunspell/src/speller/worker.rs b/divvunspell/src/speller/worker.rs index 4ce0d0b..eac9f1e 100644 --- a/divvunspell/src/speller/worker.rs +++ b/divvunspell/src/speller/worker.rs @@ -501,6 +501,50 @@ where false } + pub(crate) fn analyse(&self) -> Vec { + log::trace!("Beginning analyse"); + let pool = Pool::with_size_and_max(0, 0); + let mut nodes = speller_start_node(&pool, self.state_size() as usize); + let mut lookups = HashMap::new(); + let mut analyses: Vec = vec![]; + let best_weight = self.config.max_weight.unwrap_or(f32::MAX); + + while let Some(next_node) = nodes.pop() { + let max_weight = self.update_weight_limit(best_weight, &analyses); + + self.lexicon_epsilons(&pool, max_weight, &next_node, &mut nodes); + if next_node.input_state as usize != self.input.len() { + self.consume_input(&pool, max_weight, &next_node, &mut nodes); + continue; + } + if self.speller.lexicon().is_final(next_node.lexicon_state) { + let weight = next_node.weight() + + self + .speller + .lexicon() + .final_weight(next_node.lexicon_state) + .unwrap(); + + let string = self + .speller + .lexicon() + .alphabet() + .string_from_symbols(&next_node.string); + + { + let entry = lookups.entry(string).or_insert(weight); + + if *entry > weight { + *entry = weight; + } + } + + } + analyses = self.generate_sorted_suggestions(&lookups); + } + analyses + } + pub(crate) fn suggest(&self) -> Vec { log::trace!("Beginning suggest"); From 915e543a22d3f1ba0c13da73a41793750feb884d Mon Sep 17 00:00:00 2001 From: Flammie Pirinen Date: Tue, 12 Apr 2022 02:45:50 +0200 Subject: [PATCH 02/21] test cli --- divvunspell-bin/src/main.rs | 28 ++++++++++++++++++++++++++++ divvunspell/src/speller/mod.rs | 20 ++++++++++---------- 2 files changed, 38 insertions(+), 10 deletions(-) diff --git a/divvunspell-bin/src/main.rs b/divvunspell-bin/src/main.rs index c305c47..227985e 100644 --- a/divvunspell-bin/src/main.rs +++ b/divvunspell-bin/src/main.rs @@ -24,6 +24,7 @@ use divvunspell::{ trait OutputWriter { fn write_correction(&mut self, word: &str, is_correct: bool); fn write_suggestions(&mut self, word: &str, suggestions: &[Suggestion]); + fn write_analyses(&mut self, word: &str, analyses: &[Suggestion]); fn write_predictions(&mut self, predictions: &[String]); fn finish(&mut self); } @@ -51,6 +52,14 @@ impl OutputWriter for StdoutWriter { println!("{}", predictions.join(" ")); } + fn write_analyses(&mut self, _word: &str, suggestions: &[Suggestion]) { + println!("Analyses: "); + for sugg in suggestions { + println!("{}\t\t{}", sugg.value, sugg.weight); + } + println!(); + } + fn finish(&mut self) {} } @@ -95,6 +104,11 @@ impl OutputWriter for JsonWriter { self.predict = Some(predictions.to_vec()); } + fn write_analyses(&mut self, _word: &str, suggestions: &[Suggestion]) { + let i = self.suggest.len() - 1; + self.suggest[i].suggestions = suggestions.to_vec(); + } + fn finish(&mut self) { println!("{}", serde_json::to_string_pretty(self).unwrap()); } @@ -104,6 +118,7 @@ fn run( speller: Arc, words: Vec, writer: &mut dyn OutputWriter, + analyse: bool, is_suggesting: bool, is_always_suggesting: bool, suggest_cfg: &SpellerConfig, @@ -116,6 +131,15 @@ fn run( let suggestions = speller.clone().suggest_with_config(&word, &suggest_cfg); writer.write_suggestions(&word, &suggestions); } + if analyse { + let input_analyses = speller.clone().analyse_with_config(&word, + &suggest_cfg); + writer.write_analyses(&word, &input_analyses); + let output_analyses = + speller.clone().suggest_with_analyse_with_config(&word, + &suggest_cfg); + writer.write_analyses(&word, &output_analyses); + } } } #[derive(Debug, Options)] @@ -150,6 +174,9 @@ struct SuggestArgs { #[options(short = "S", help = "always show suggestions even if word is correct")] always_suggest: bool, + #[options(short = "a", help = "analyse words and suggestions")] + analyse: bool, + #[options(help = "maximum weight limit for suggestions")] weight: Option, @@ -327,6 +354,7 @@ fn suggest(args: SuggestArgs) -> anyhow::Result<()> { speller, words, &mut *writer, + args.analyse, true, args.always_suggest, &suggest_cfg, diff --git a/divvunspell/src/speller/mod.rs b/divvunspell/src/speller/mod.rs index 1110b43..50170b7 100644 --- a/divvunspell/src/speller/mod.rs +++ b/divvunspell/src/speller/mod.rs @@ -58,9 +58,9 @@ pub trait Speller { fn is_correct_with_config(self: Arc, word: &str, config: &SpellerConfig) -> bool; fn suggest(self: Arc, word: &str) -> Vec; fn suggest_with_config(self: Arc, word: &str, config: &SpellerConfig) -> Vec; -} +//} -pub trait Analyser { +//pub trait Analyser { fn analyse(self: Arc, word: &str) -> Vec; fn analyse_with_config(self: Arc, word: &str, config: &SpellerConfig) -> Vec; fn suggest_with_analyse(self: Arc, word: &str) -> Vec; @@ -131,14 +131,14 @@ where self.suggest_single(word, config) } } -} - -impl Analyser for HfstSpeller -where - F: crate::vfs::File + Send, - T: Transducer + Send, - U: Transducer + Send, -{ +//} + +//impl Analyser for HfstSpeller +//where +// F: crate::vfs::File + Send, +// T: Transducer + Send, +// U: Transducer + Send, +//{ #[allow(clippy::wrong_self_convention)] fn analyse_with_config(self: Arc, word: &str, config: &SpellerConfig) -> Vec { if word.len() == 0 { From 234de2441b27e3977775cf43130b77405860abf1 Mon Sep 17 00:00:00 2001 From: Flammie Pirinen Date: Tue, 12 Apr 2022 11:12:38 +0200 Subject: [PATCH 03/21] analyse through suggestions --- divvunspell/src/speller/mod.rs | 2 +- divvunspell/src/speller/worker.rs | 5 +---- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/divvunspell/src/speller/mod.rs b/divvunspell/src/speller/mod.rs index 50170b7..467f7c5 100644 --- a/divvunspell/src/speller/mod.rs +++ b/divvunspell/src/speller/mod.rs @@ -168,7 +168,7 @@ where } let worker = SpellerWorker::new(self.clone(), self.to_input_vec(word), config.clone()); - worker.analyse() + worker.suggest() } } diff --git a/divvunspell/src/speller/worker.rs b/divvunspell/src/speller/worker.rs index eac9f1e..f7c6fdd 100644 --- a/divvunspell/src/speller/worker.rs +++ b/divvunspell/src/speller/worker.rs @@ -513,10 +513,7 @@ where let max_weight = self.update_weight_limit(best_weight, &analyses); self.lexicon_epsilons(&pool, max_weight, &next_node, &mut nodes); - if next_node.input_state as usize != self.input.len() { - self.consume_input(&pool, max_weight, &next_node, &mut nodes); - continue; - } + self.lexicon_consume(&pool, max_weight, &next_node, &mut nodes); if self.speller.lexicon().is_final(next_node.lexicon_state) { let weight = next_node.weight() + self From d62007b38d7b5bf037df5505e6f94a682e6dbea9 Mon Sep 17 00:00:00 2001 From: Flammie Pirinen Date: Wed, 1 Jun 2022 17:31:16 +0200 Subject: [PATCH 04/21] tracing --- divvunspell-bin/src/main.rs | 1 + divvunspell/src/speller/mod.rs | 2 ++ divvunspell/src/speller/worker.rs | 4 +++- 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/divvunspell-bin/src/main.rs b/divvunspell-bin/src/main.rs index 227985e..404ea70 100644 --- a/divvunspell-bin/src/main.rs +++ b/divvunspell-bin/src/main.rs @@ -438,6 +438,7 @@ fn predict(_args: PredictArgs) -> anyhow::Result<()> { fn main() -> anyhow::Result<()> { pretty_env_logger::init(); + let args = Args::parse_args_default_or_exit(); match args.command { diff --git a/divvunspell/src/speller/mod.rs b/divvunspell/src/speller/mod.rs index 467f7c5..05ef7fd 100644 --- a/divvunspell/src/speller/mod.rs +++ b/divvunspell/src/speller/mod.rs @@ -147,6 +147,7 @@ where let worker = SpellerWorker::new(self.clone(), self.to_input_vec(&word), config.clone()); + log::trace!("Beginning analyse with config in mod"); worker.analyse() //vec![] @@ -166,6 +167,7 @@ where if word.len() == 0 { return vec![]; } + log::trace!("Beginning analyse suggest with config in mod"); let worker = SpellerWorker::new(self.clone(), self.to_input_vec(word), config.clone()); worker.suggest() diff --git a/divvunspell/src/speller/worker.rs b/divvunspell/src/speller/worker.rs index f7c6fdd..f7b3326 100644 --- a/divvunspell/src/speller/worker.rs +++ b/divvunspell/src/speller/worker.rs @@ -483,6 +483,7 @@ where } pub(crate) fn is_correct(&self) -> bool { + log::trace!("is_correct"); // let max_weight = speller_max_weight(&self.config); let pool = Pool::with_size_and_max(0, 0); let mut nodes = speller_start_node(&pool, self.state_size() as usize); @@ -610,7 +611,7 @@ where .lexicon() .alphabet() .string_from_symbols(&next_node.string); - + log::trace!("suggesting? {}::{}", string, weight); if weight < best_weight { best_weight = weight; } @@ -633,6 +634,7 @@ where &self, corrections: &HashMap, ) -> Vec { + log::trace!("Generating sorted suggestions"); let mut c: Vec = corrections .into_iter() .map(|x| Suggestion::new(x.0.clone(), *x.1)) From 8dcc664b75aac7d3ced6d61907dbcc8dec15e899 Mon Sep 17 00:00:00 2001 From: Flammie Pirinen Date: Thu, 2 Jun 2022 02:23:12 +0200 Subject: [PATCH 05/21] only sort at end? --- divvunspell/src/speller/worker.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/divvunspell/src/speller/worker.rs b/divvunspell/src/speller/worker.rs index f7b3326..5e2005e 100644 --- a/divvunspell/src/speller/worker.rs +++ b/divvunspell/src/speller/worker.rs @@ -623,10 +623,8 @@ where *entry = weight; } } - - suggestions = self.generate_sorted_suggestions(&corrections); } - + suggestions = self.generate_sorted_suggestions(&corrections); suggestions } From 6eb0bd2be726a3dfd68a2579914fbc62fe7c8155 Mon Sep 17 00:00:00 2001 From: Flammie A Pirinen Date: Fri, 7 Oct 2022 02:24:10 +0200 Subject: [PATCH 06/21] continuation marker for incompletion? --- accuracy/src/main.rs | 1 + divvunspell-bin/src/main.rs | 9 +++++-- divvunspell/src/speller/mod.rs | 34 +++++++++++++++++++++------ divvunspell/src/speller/suggestion.rs | 9 +++++-- divvunspell/src/speller/worker.rs | 21 +++++++++++------ 5 files changed, 56 insertions(+), 18 deletions(-) diff --git a/accuracy/src/main.rs b/accuracy/src/main.rs index 312f89a..079d82a 100644 --- a/accuracy/src/main.rs +++ b/accuracy/src/main.rs @@ -21,6 +21,7 @@ static CFG: SpellerConfig = SpellerConfig { beam: None, case_handling: Some(CaseHandlingConfig::default()), node_pool_size: 128, + completion_marker: None, }; fn load_words( diff --git a/divvunspell-bin/src/main.rs b/divvunspell-bin/src/main.rs index 404ea70..1a97214 100644 --- a/divvunspell-bin/src/main.rs +++ b/divvunspell-bin/src/main.rs @@ -42,7 +42,9 @@ impl OutputWriter for StdoutWriter { fn write_suggestions(&mut self, _word: &str, suggestions: &[Suggestion]) { for sugg in suggestions { - println!("{}\t\t{}", sugg.value, sugg.weight); + + println!("{}\t\t{} (is complete {})", sugg.value, sugg.weight, + sugg.completed); } println!(); } @@ -183,6 +185,9 @@ struct SuggestArgs { #[options(help = "maximum number of results")] nbest: Option, + #[options(help = "Character for incomplete predictions")] + continuation_marker: Option, + #[options( no_short, long = "no-case-handling", @@ -310,7 +315,7 @@ fn suggest(args: SuggestArgs) -> anyhow::Result<()> { if args.disable_case_handling { suggest_cfg.case_handling = None; } - + suggest_cfg.completion_marker = args.continuation_marker; if let Some(v) = args.nbest { if v == 0 { suggest_cfg.n_best = None; diff --git a/divvunspell/src/speller/mod.rs b/divvunspell/src/speller/mod.rs index 05ef7fd..d629493 100644 --- a/divvunspell/src/speller/mod.rs +++ b/divvunspell/src/speller/mod.rs @@ -29,6 +29,7 @@ pub struct SpellerConfig { pub beam: Option, pub case_handling: Option, pub node_pool_size: usize, + pub completion_marker: Option, } impl SpellerConfig { @@ -39,6 +40,7 @@ impl SpellerConfig { beam: None, case_handling: Some(CaseHandlingConfig::default()), node_pool_size: 128, + completion_marker: None, } } } @@ -236,6 +238,7 @@ where fn suggest_single(self: Arc, word: &str, config: &SpellerConfig) -> Vec { let worker = SpellerWorker::new(self.clone(), self.to_input_vec(word), config.clone()); + log::trace!("suggesting single {}", word); worker.suggest() } @@ -247,6 +250,7 @@ where ) -> Vec { use crate::tokenizer::case_handling::*; + log::trace!("suggesting cases..."); let CaseHandler { original_input, mutation, @@ -256,6 +260,7 @@ where let mut best: HashMap = HashMap::new(); for word in std::iter::once(&original_input).chain(words.iter()) { + log::trace!("suggesting for word {}", word); let worker = SpellerWorker::new(self.clone(), self.to_input_vec(&word), config.clone()); let mut suggestions = worker.suggest(); @@ -275,7 +280,9 @@ where match mode { CaseMode::MergeAll => { + log::trace!("Case merge all"); for sugg in suggestions.into_iter() { + log::trace!("for {}", sugg.value); let penalty_start = if !sugg.value().starts_with(word.chars().next().unwrap()) { case_handling.start_penalty @@ -316,14 +323,27 @@ where if best.is_empty() { return vec![]; } - - let mut out = best - .into_iter() - .map(|(k, v)| Suggestion { - value: k, - weight: v, - }) + let mut out: Vec; + if let Some(s) = &config.completion_marker { + out = best + .into_iter() + .map(|(k, v)| Suggestion { + value: k.clone(), + weight: v, + completed: !k.ends_with(s), + }) .collect::>(); + } + else { + out = best + .into_iter() + .map(|(k, v)| Suggestion { + value: k, + weight: v, + completed: true, + }) + .collect::>(); + } out.sort(); if let Some(n_best) = config.n_best { out.truncate(n_best); diff --git a/divvunspell/src/speller/suggestion.rs b/divvunspell/src/speller/suggestion.rs index 6ac9500..055e638 100644 --- a/divvunspell/src/speller/suggestion.rs +++ b/divvunspell/src/speller/suggestion.rs @@ -8,11 +8,12 @@ use std::cmp::Ordering::Equal; pub struct Suggestion { pub value: SmolStr, pub weight: Weight, + pub completed: bool, } impl Suggestion { - pub fn new(value: SmolStr, weight: Weight) -> Suggestion { - Suggestion { value, weight } + pub fn new(value: SmolStr, weight: Weight, completed: bool) -> Suggestion { + Suggestion { value, weight, completed } } pub fn value(&self) -> &str { @@ -22,6 +23,10 @@ impl Suggestion { pub fn weight(&self) -> Weight { self.weight } + + pub fn completed(&self) -> bool { + self.completed + } } impl PartialOrd for Suggestion { diff --git a/divvunspell/src/speller/worker.rs b/divvunspell/src/speller/worker.rs index 5e2005e..d8ceb9e 100644 --- a/divvunspell/src/speller/worker.rs +++ b/divvunspell/src/speller/worker.rs @@ -611,7 +611,7 @@ where .lexicon() .alphabet() .string_from_symbols(&next_node.string); - log::trace!("suggesting? {}::{}", string, weight); + // log::trace!("suggesting? {}::{}", string, weight); if weight < best_weight { best_weight = weight; } @@ -633,17 +633,24 @@ where corrections: &HashMap, ) -> Vec { log::trace!("Generating sorted suggestions"); - let mut c: Vec = corrections - .into_iter() - .map(|x| Suggestion::new(x.0.clone(), *x.1)) - .collect(); - + let mut c: Vec; + if let Some(s) = &self.config.completion_marker { + c = corrections + .into_iter() + .map(|x| Suggestion::new(x.0.clone(), *x.1, x.0.ends_with(s))) + .collect(); + } + else { + c = corrections + .into_iter() + .map(|x| Suggestion::new(x.0.clone(), *x.1, true)) + .collect(); + } c.sort(); if let Some(n) = self.config.n_best { c.truncate(n); } - c } } From efa3d4a3b4175dec49a3ad2a215117a81624a6b2 Mon Sep 17 00:00:00 2001 From: Flammie A Pirinen Date: Fri, 7 Oct 2022 03:28:04 +0200 Subject: [PATCH 07/21] something or other --- divvunspell/src/speller/mod.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/divvunspell/src/speller/mod.rs b/divvunspell/src/speller/mod.rs index d629493..8a36aab 100644 --- a/divvunspell/src/speller/mod.rs +++ b/divvunspell/src/speller/mod.rs @@ -460,6 +460,7 @@ pub(crate) mod ffi { }, case_handling, node_pool_size: config.node_pool_size, + completion_marker: None, }; Ok(out) From 6aa41c614069b419ae78e2f49ba21685846e234a Mon Sep 17 00:00:00 2001 From: Brendan Molloy Date: Thu, 8 Feb 2024 17:51:29 +0100 Subject: [PATCH 08/21] Make shinier --- Cargo.lock | 410 +++++++++++--------------- accuracy/src/main.rs | 2 +- divvunspell-bin/src/main.rs | 139 ++++++--- divvunspell/src/archive/meta.rs | 2 +- divvunspell/src/speller/mod.rs | 75 ++--- divvunspell/src/speller/suggestion.rs | 8 +- divvunspell/src/speller/worker.rs | 13 +- 7 files changed, 326 insertions(+), 323 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index dcb726d..6de6c4b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -94,9 +94,9 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.75" +version = "1.0.79" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4668cab20f66d8d020e1fbc0ebe47217433c1b6c8f2040faf858554e394ace6" +checksum = "080e9890a082662b09c1ad45f567faeeb47f22b5fb23895fbe1e651e718e25ca" [[package]] name = "atty" @@ -141,9 +141,9 @@ dependencies = [ [[package]] name = "base64" -version = "0.21.5" +version = "0.21.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "35636a1494ede3b646cc98f74f8e62c773a38a659ebc777a2cf26b9b74171df9" +checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" [[package]] name = "bitflags" @@ -153,9 +153,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bitflags" -version = "2.4.1" +version = "2.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "327762f6e5a765692301e5bb513e0d9fef63be86bbc14528052b1cd3e6f03e07" +checksum = "ed570934406eb16438a4e976b1b4500774099c13b8cb96eec99f620f05090ddf" [[package]] name = "block-buffer" @@ -209,9 +209,9 @@ dependencies = [ [[package]] name = "bstr" -version = "1.8.0" +version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "542f33a8835a0884b006a0c3df3dadd99c0c3f296ed26c2fdc8028e01ad6230c" +checksum = "c48f0051a4b4c5e0b6d365cd04af53aeaa209e3cc15ec2cdb69e73cc87fbd0dc" dependencies = [ "memchr", "serde", @@ -326,16 +326,16 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "chrono" -version = "0.4.31" +version = "0.4.33" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f2c685bad3eb3d45a01354cedb7d5faa66194d1d58ba6e267a8de788f79db38" +checksum = "9f13690e35a5e4ace198e7beea2895d29f3a9cc55015fcebe6336bd2010af9eb" dependencies = [ "android-tzdata", "iana-time-zone", "js-sys", "num-traits", "wasm-bindgen", - "windows-targets 0.48.5", + "windows-targets 0.52.0", ] [[package]] @@ -378,15 +378,15 @@ dependencies = [ [[package]] name = "console" -version = "0.15.7" +version = "0.15.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c926e00cc70edefdc64d3a5ff31cc65bb97a3460097762bd23afb4d8145fccf8" +checksum = "0e1f83fc076bd6dd27517eacdf25fef6c4dfe5f1d7448bafaaf3a26f13b5e4eb" dependencies = [ "encode_unicode", "lazy_static", "libc", "unicode-width", - "windows-sys 0.45.0", + "windows-sys 0.52.0", ] [[package]] @@ -416,9 +416,9 @@ dependencies = [ [[package]] name = "cpufeatures" -version = "0.2.11" +version = "0.2.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ce420fe07aecd3e67c5f910618fe65e94158f6dcc0adf44e00d69ce2bdfe0fd0" +checksum = "53fe5e26ff1b7aef8bca9c6080520cfb8d9333c7568e1829cef191a9723e5504" dependencies = [ "libc", ] @@ -434,36 +434,28 @@ dependencies = [ [[package]] name = "crossbeam-deque" -version = "0.8.3" +version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ce6fd6f855243022dcecf8702fef0c297d4338e226845fe067f6341ad9fa0cef" +checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d" dependencies = [ - "cfg-if", "crossbeam-epoch", "crossbeam-utils", ] [[package]] name = "crossbeam-epoch" -version = "0.9.15" +version = "0.9.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae211234986c545741a7dc064309f67ee1e5ad243d0e48335adc0484d960bcc7" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" dependencies = [ - "autocfg 1.1.0", - "cfg-if", "crossbeam-utils", - "memoffset", - "scopeguard", ] [[package]] name = "crossbeam-utils" -version = "0.8.16" +version = "0.8.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a22b2d63d4d1dc0b7f1b6b2747dd0088008a9be28b6ddf0b1e7d335e3037294" -dependencies = [ - "cfg-if", -] +checksum = "248e3bacc7dc6baa3b21e405ee045c3047101a49145e7e9eca583ab4c2ca5345" [[package]] name = "crypto-common" @@ -523,9 +515,9 @@ dependencies = [ [[package]] name = "curl-sys" -version = "0.4.70+curl-8.5.0" +version = "0.4.71+curl-8.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c0333d8849afe78a4c8102a429a446bfdd055832af071945520e835ae2d841e" +checksum = "c7b12a7ab780395666cb576203dc3ed6e01513754939a600b85196ccf5356bc5" dependencies = [ "cc", "libc", @@ -849,42 +841,42 @@ checksum = "a06f77d526c1a601b7c4cdd98f54b5eaabffc14d5f2f0296febdc7f357c6d3ba" [[package]] name = "futures-channel" -version = "0.3.29" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff4dd66668b557604244583e3e1e1eada8c5c2e96a6d0d6653ede395b78bbacb" +checksum = "eac8f7d7865dcb88bd4373ab671c8cf4508703796caa2b1985a9ca867b3fcb78" dependencies = [ "futures-core", ] [[package]] name = "futures-core" -version = "0.3.29" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb1d22c66e66d9d72e1758f0bd7d4fd0bee04cad842ee34587d68c07e45d088c" +checksum = "dfc6580bb841c5a68e9ef15c77ccc837b40a7504914d52e47b8b0e9bbda25a1d" [[package]] name = "futures-io" -version = "0.3.29" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8bf34a163b5c4c52d0478a4d757da8fb65cabef42ba90515efee0f6f9fa45aaa" +checksum = "a44623e20b9681a318efdd71c299b6b222ed6f231972bfe2f224ebad6311f0c1" [[package]] name = "futures-sink" -version = "0.3.29" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e36d3378ee38c2a36ad710c5d30c2911d752cb941c00c72dbabfb786a7970817" +checksum = "9fb8e00e87438d937621c1c6269e53f536c14d3fbd6a042bb24879e57d474fb5" [[package]] name = "futures-task" -version = "0.3.29" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "efd193069b0ddadc69c46389b740bbccdd97203899b48d09c5f7969591d6bae2" +checksum = "38d84fa142264698cdce1a9f9172cf383a0c82de1bddcf3092901442c4097004" [[package]] name = "futures-util" -version = "0.3.29" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a19526d624e703a3179b3d322efec918b6246ea0fa51d41124525f00f1cc8104" +checksum = "3d6401deb83407ab3da39eba7e33987a73c3df0c82b4bb5813ee871c19c41d48" dependencies = [ "futures-core", "futures-io", @@ -907,9 +899,9 @@ dependencies = [ [[package]] name = "getrandom" -version = "0.2.11" +version = "0.2.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fe9006bed769170c11f845cf00c7c1e9092aeb3f268e007c3e760ac68008070f" +checksum = "190092ea657667030ac6a35e305e62fc4dd69fd98ac98631e5d3a2b1575a12b5" dependencies = [ "cfg-if", "libc", @@ -974,9 +966,9 @@ dependencies = [ [[package]] name = "h2" -version = "0.3.22" +version = "0.3.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4d6250322ef6e60f93f9a2162799302cd6f68f79f6e5d85c8c16f14d1d958178" +checksum = "bb2c4422095b67ee78da96fbb51a4cc413b3b25883c7717ff7ca1ab31022c9c9" dependencies = [ "bytes", "fnv", @@ -1042,9 +1034,9 @@ dependencies = [ [[package]] name = "hermit-abi" -version = "0.3.3" +version = "0.3.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d77f7ec81a6d05a3abb01ab6eb7590f6083d08449fe5a1c8b1e620283546ccb7" +checksum = "d0c62115964e08cb8039170eb33c1d0e2388a256930279edca206fff675f82c3" [[package]] name = "http" @@ -1097,9 +1089,9 @@ checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" [[package]] name = "hyper" -version = "0.14.27" +version = "0.14.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ffb1cfd654a8219eaef89881fdb3bb3b1cdc5fa75ded05d6933b2b382e395468" +checksum = "bf96e135eb83a2a8ddf766e426a841d8ddd7449d5f00d34ea02b41d2f19eef80" dependencies = [ "bytes", "futures-channel", @@ -1112,7 +1104,7 @@ dependencies = [ "httpdate", "itoa", "pin-project-lite", - "socket2 0.4.10", + "socket2 0.5.5", "tokio", "tower-service", "tracing", @@ -1134,9 +1126,9 @@ dependencies = [ [[package]] name = "iana-time-zone" -version = "0.1.58" +version = "0.1.60" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8326b86b6cff230b97d0d312a6c40a60726df3332e721f72a1b035f451663b20" +checksum = "e7ffbb5a1b541ea2561f8c41c087286cc091e21e556a4f09a8f6cbf17b69b141" dependencies = [ "android_system_properties", "core-foundation-sys", @@ -1173,9 +1165,9 @@ dependencies = [ [[package]] name = "ignore" -version = "0.4.21" +version = "0.4.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "747ad1b4ae841a78e8aba0d63adbfbeaea26b517b63705d47856b73015d27060" +checksum = "b46810df39e66e925525d6e38ce1e7f6e1d208f72dc39757880fcb66e2c58af1" dependencies = [ "crossbeam-deque", "globset", @@ -1189,9 +1181,9 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.1.0" +version = "2.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d530e1a18b1cb4c484e6e34556a0d948706958449fca0cab753d649f2bce3d1f" +checksum = "824b2ae422412366ba479e8111fd301f7b5faece8149317bb81925979a53f520" dependencies = [ "equivalent", "hashbrown 0.14.3", @@ -1264,18 +1256,18 @@ checksum = "b1a46d1a171d865aa5f83f92695765caa047a9b4cbae2cbf37dbd613a793fd4c" [[package]] name = "jobserver" -version = "0.1.27" +version = "0.1.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c37f63953c4c63420ed5fd3d6d398c719489b9f872b9fa683262f8edd363c7d" +checksum = "ab46a6e9526ddef3ae7f787c06f0f2600639ba80ea3eade3d8e670a2230f51d6" dependencies = [ "libc", ] [[package]] name = "js-sys" -version = "0.3.66" +version = "0.3.68" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cee9c64da59eae3b50095c18d3e74f8b73c0b86d2792824ff01bbce68ba229ca" +checksum = "406cda4b368d531c842222cf9d2600a9a4acce8d29423695379c6868a143a9ee" dependencies = [ "wasm-bindgen", ] @@ -1294,9 +1286,9 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" [[package]] name = "libc" -version = "0.2.151" +version = "0.2.153" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "302d7ab3130588088d277783b1e2d2e10c9e9e4a16dd9050e6ec93fb3e7048f4" +checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd" [[package]] name = "libredox" @@ -1304,16 +1296,16 @@ version = "0.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "85c833ca1e66078851dba29046874e38f08b2c883700aa29a03ddd3b23814ee8" dependencies = [ - "bitflags 2.4.1", + "bitflags 2.4.2", "libc", "redox_syscall 0.4.1", ] [[package]] name = "libz-sys" -version = "1.1.12" +version = "1.1.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d97137b25e321a73eef1418d1d5d2eda4d77e12813f8e6dead84bc52c5870a7b" +checksum = "037731f5d3aaa87a5675e895b63ddff1a87624bc29f77004ea829809654e48f6" dependencies = [ "cc", "libc", @@ -1329,9 +1321,9 @@ checksum = "89be94dbd775db37b46ca4f4bf5cf89adfb13ba197bfbcb69b2122848ee73c26" [[package]] name = "linux-raw-sys" -version = "0.4.12" +version = "0.4.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c4cd1a83af159aa67994778be9070f0ae1bd732942279cabb14f86f986a21456" +checksum = "01cda141df6706de531b6c46c3a33ecca755538219bd484262fa09410c13539c" [[package]] name = "lock_api" @@ -1384,9 +1376,9 @@ dependencies = [ [[package]] name = "memchr" -version = "2.6.4" +version = "2.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f665ee40bc4a3c5590afb1e9677db74a508659dfd71e126420da8274909a0167" +checksum = "523dc4f511e55ab87b694dc30d0f820d60906ef06413f93d4d7a1385599cc149" [[package]] name = "memmap2" @@ -1397,15 +1389,6 @@ dependencies = [ "libc", ] -[[package]] -name = "memoffset" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a634b1c61a95585bd15607c6ab0c4e5b226e695ff2800ba0cdccddf208c406c" -dependencies = [ - "autocfg 1.1.0", -] - [[package]] name = "mime" version = "0.3.17" @@ -1414,9 +1397,9 @@ checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" [[package]] name = "miniz_oxide" -version = "0.7.1" +version = "0.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e7810e0be55b428ada41041c41f32c9f1a42817901b4ccf45fa3d4b6561e74c7" +checksum = "9d811f3e15f28568be3407c8e7fdb6514c1cda3cb30683f15b6a1a1dc4ea14a7" dependencies = [ "adler", ] @@ -1465,28 +1448,27 @@ dependencies = [ [[package]] name = "num-complex" -version = "0.4.4" +version = "0.4.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ba157ca0885411de85d6ca030ba7e2a83a28636056c7c699b07c8b6f7383214" +checksum = "23c6602fda94a57c990fe0df199a035d83576b496aa29f4e634a8ac6004e68a6" dependencies = [ "num-traits", ] [[package]] name = "num-integer" -version = "0.1.45" +version = "0.1.46" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "225d3389fb3509a24c93f5c29eb6bde2586b98d9f016636dff58d7c6f7569cd9" +checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" dependencies = [ - "autocfg 1.1.0", "num-traits", ] [[package]] name = "num-traits" -version = "0.2.17" +version = "0.2.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "39e3200413f237f41ab11ad6d161bc7239c84dcb631773ccd7de3dfe4b5c267c" +checksum = "da0df0e5185db44f69b44f26786fe401b6c293d1907744beaa7fa62b2e5a517a" dependencies = [ "autocfg 1.1.0", ] @@ -1497,7 +1479,7 @@ version = "1.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43" dependencies = [ - "hermit-abi 0.3.3", + "hermit-abi 0.3.5", "libc", ] @@ -1515,9 +1497,9 @@ checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3" [[package]] name = "object" -version = "0.32.1" +version = "0.32.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9cf5f9dd3933bd50a9e1f149ec995f39ae2c496d31fd772c1fd45ebc27e902b0" +checksum = "a6a622008b6e321afc04970976f62ee297fdbaa6f95318ca343e3eebb9648441" dependencies = [ "memchr", ] @@ -1530,11 +1512,11 @@ checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" [[package]] name = "openssl" -version = "0.10.61" +version = "0.10.63" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6b8419dc8cc6d866deb801274bba2e6f8f6108c1bb7fcc10ee5ab864931dbb45" +checksum = "15c9d69dd87a29568d4d017cfe8ec518706046a05184e5aea92d0af890b803c8" dependencies = [ - "bitflags 2.4.1", + "bitflags 2.4.2", "cfg-if", "foreign-types", "libc", @@ -1551,7 +1533,7 @@ checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.41", + "syn 2.0.48", ] [[package]] @@ -1562,9 +1544,9 @@ checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf" [[package]] name = "openssl-sys" -version = "0.9.97" +version = "0.9.99" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3eaad34cdd97d81de97964fc7f29e2d104f483840d906ef56daa1912338460b" +checksum = "22e1bf214306098e4832460f797824c05d25aacdf896f64a985fb0fd992454ae" dependencies = [ "cc", "libc", @@ -1703,9 +1685,9 @@ checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" [[package]] name = "pkg-config" -version = "0.3.27" +version = "0.3.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26072860ba924cbfa98ea39c8c19b4dd6a4a25423dbdf219c1eca91aa0cf6964" +checksum = "2900ede94e305130c13ddd391e0ab7cbaeb783945ae07a279c268cb05109c6cb" [[package]] name = "ppv-lite86" @@ -1749,9 +1731,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.70" +version = "1.0.78" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "39278fbbf5fb4f646ce651690877f89d1c5811a3d4acb27700c1cb3cdb78fd3b" +checksum = "e2422ad645d89c99f8f3e6b88a9fdeca7fabeac836b1002371c4367c8f984aae" dependencies = [ "unicode-ident", ] @@ -1770,9 +1752,9 @@ checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0" [[package]] name = "quote" -version = "1.0.33" +version = "1.0.35" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae" +checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef" dependencies = [ "proc-macro2", ] @@ -1921,9 +1903,9 @@ checksum = "60a357793950651c4ed0f3f52338f53b2f809f32d83a07f72909fa13e4c6c1e3" [[package]] name = "rayon" -version = "1.8.0" +version = "1.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c27db03db7734835b3f53954b534c91069375ce6ccaa2e065441e07d9b6cdb1" +checksum = "fa7237101a77a10773db45d62004a272517633fbcc3df19d96455ede1122e051" dependencies = [ "either", "rayon-core", @@ -1931,9 +1913,9 @@ dependencies = [ [[package]] name = "rayon-core" -version = "1.12.0" +version = "1.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ce3fb6ad83f861aac485e76e1985cd109d9a3713802152be56c3b1f0e0658ed" +checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2" dependencies = [ "crossbeam-deque", "crossbeam-utils", @@ -1979,9 +1961,9 @@ dependencies = [ [[package]] name = "regex" -version = "1.10.2" +version = "1.10.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "380b951a9c5e80ddfd6136919eef32310721aa4aacd4889a8d39124b026ab343" +checksum = "b62dbe01f0b06f9d8dc7d49e05a0785f153b00b2c227856282f671e0318c9b15" dependencies = [ "aho-corasick", "memchr", @@ -1991,9 +1973,9 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.4.3" +version = "0.4.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f804c7828047e88b2d32e2d7fe5a105da8ee3264f01902f796c8e067dc2483f" +checksum = "5bb987efffd3c6d0d8f5f89510bb458559eab11e4f869acb20bf845e016259cd" dependencies = [ "aho-corasick", "memchr", @@ -2008,15 +1990,15 @@ checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f" [[package]] name = "relative-path" -version = "1.9.0" +version = "1.9.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c707298afce11da2efef2f600116fa93ffa7a032b5d7b628aa17711ec81383ca" +checksum = "e898588f33fdd5b9420719948f9f2a32c922a246964576f71ba7f24f80610fbc" [[package]] name = "reqwest" -version = "0.11.22" +version = "0.11.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "046cd98826c46c2ac8ddecae268eb5c2e58628688a5fc7a2643704a73faba95b" +checksum = "c6920094eb85afde5e4a138be3f2de8bbdf28000f0029e72c45025a56b042251" dependencies = [ "base64", "bytes", @@ -2036,9 +2018,11 @@ dependencies = [ "once_cell", "percent-encoding", "pin-project-lite", + "rustls-pemfile", "serde", "serde_json", "serde_urlencoded", + "sync_wrapper", "system-configuration", "tokio", "tokio-native-tls", @@ -2097,17 +2081,26 @@ checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76" [[package]] name = "rustix" -version = "0.38.28" +version = "0.38.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72e572a5e8ca657d7366229cdde4bd14c4eb5499a9573d4d366fe1b599daa316" +checksum = "6ea3e1a662af26cd7a3ba09c0297a31af215563ecf42817c98df621387f4e949" dependencies = [ - "bitflags 2.4.1", + "bitflags 2.4.2", "errno", "libc", "linux-raw-sys", "windows-sys 0.52.0", ] +[[package]] +name = "rustls-pemfile" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1c74cae0a4cf6ccbbf5f359f08efdf8ee7e1dc532573bf0db71968cb56b1448c" +dependencies = [ + "base64", +] + [[package]] name = "ryu" version = "1.0.16" @@ -2125,11 +2118,11 @@ dependencies = [ [[package]] name = "schannel" -version = "0.1.22" +version = "0.1.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c3733bf4cf7ea0880754e19cb5a462007c4a8c1914bff372ccc95b464f1df88" +checksum = "fbc91545643bcf3a0bbb6569265615222618bdf33ce4ffbbd13c4bbd4c093534" dependencies = [ - "windows-sys 0.48.0", + "windows-sys 0.52.0", ] [[package]] @@ -2163,9 +2156,9 @@ dependencies = [ [[package]] name = "serde" -version = "1.0.193" +version = "1.0.196" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "25dd9975e68d0cb5aa1120c288333fc98731bd1dd12f561e468ea4728c042b89" +checksum = "870026e60fa08c69f064aa766c10f10b1d62db9ccd4d0abb206472bee0ce3b32" dependencies = [ "serde_derive", ] @@ -2184,20 +2177,20 @@ dependencies = [ [[package]] name = "serde_derive" -version = "1.0.193" +version = "1.0.196" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43576ca501357b9b071ac53cdc7da8ef0cbd9493d8df094cd821777ea6e894d3" +checksum = "33c85360c95e7d137454dc81d9a4ed2b8efd8fbe19cee57357b32b9771fccb67" dependencies = [ "proc-macro2", "quote", - "syn 2.0.41", + "syn 2.0.48", ] [[package]] name = "serde_json" -version = "1.0.108" +version = "1.0.113" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d1c7e3eac408d115102c4c24ad393e0821bb3a5df4d506a80f85f7a742a526b" +checksum = "69801b70b1c3dac963ecb03a364ba0ceda9cf60c71cfe475e99864759c8b8a79" dependencies = [ "itoa", "ryu", @@ -2244,9 +2237,9 @@ dependencies = [ [[package]] name = "smallvec" -version = "1.11.2" +version = "1.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4dccd0940a2dcdf68d092b8cbab7dc0ad8fa938bf95787e1b916b0e3d0e8e970" +checksum = "e6ecd384b10a64542d77071bd64bd7b231f4ed5940fba55e98c3de13824cf3d7" [[package]] name = "smol_str" @@ -2338,15 +2331,21 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.41" +version = "2.0.48" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "44c8b28c477cc3bf0e7966561e3460130e1255f7a1cf71931075f1c5e7a7e269" +checksum = "0f3531638e407dfc0814761abb7c00a5b54992b849452a0646b7f65c9f770f3f" dependencies = [ "proc-macro2", "quote", "unicode-ident", ] +[[package]] +name = "sync_wrapper" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2047c6ded9c721764247e62cd3b03c09ffc529b2ba5b10ec482ae507a4a70160" + [[package]] name = "system-configuration" version = "0.5.1" @@ -2397,22 +2396,21 @@ dependencies = [ [[package]] name = "tempfile" -version = "3.8.1" +version = "3.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ef1adac450ad7f4b3c28589471ade84f25f731a7a0fe30d71dfa9f60fd808e5" +checksum = "a365e8cd18e44762ef95d87f284f4b5cd04107fec2ff3052bd6a3e6069669e67" dependencies = [ "cfg-if", "fastrand", - "redox_syscall 0.4.1", "rustix", - "windows-sys 0.48.0", + "windows-sys 0.52.0", ] [[package]] name = "termcolor" -version = "1.4.0" +version = "1.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff1bc3d3f05aff0403e8ac0d92ced918ec05b666a43f83297ccef5bea8a3d449" +checksum = "06794f8f6c5c898b3275aebefa6b8a1cb24cd2c6c79397ab15774837a0bc5755" dependencies = [ "winapi-util", ] @@ -2439,22 +2437,22 @@ dependencies = [ [[package]] name = "thiserror" -version = "1.0.50" +version = "1.0.56" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f9a7210f5c9a7156bb50aa36aed4c95afb51df0df00713949448cf9e97d382d2" +checksum = "d54378c645627613241d077a3a79db965db602882668f9136ac42af9ecb730ad" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.50" +version = "1.0.56" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "266b2e40bc00e5a6c09c3584011e08b06f123c00362c92b975ba9843aaaa14b8" +checksum = "fa0faa943b50f3db30a20aa7e265dbc66076993efed8463e8de414e5d06d3471" dependencies = [ "proc-macro2", "quote", - "syn 2.0.41", + "syn 2.0.48", ] [[package]] @@ -2485,9 +2483,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokio" -version = "1.35.0" +version = "1.36.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "841d45b238a16291a4e1584e61820b8ae57d696cc5015c459c229ccc6990cc1c" +checksum = "61285f6515fa018fb2d1e46eb21223fff441ee8db5d0f1435e8ab4f5cdb80931" dependencies = [ "backtrace", "bytes", @@ -2561,7 +2559,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.41", + "syn 2.0.48", ] [[package]] @@ -2788,9 +2786,9 @@ dependencies = [ [[package]] name = "unicode-bidi" -version = "0.3.14" +version = "0.3.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f2528f27a9eb2b21e69c95319b30bd0efd85d09c379741b0f78ea1d86be2416" +checksum = "08f95100a766bf4f8f28f90d77e0a5461bbdb219042e7679bebe79004fed8d75" [[package]] name = "unicode-ident" @@ -2818,9 +2816,9 @@ dependencies = [ [[package]] name = "unicode-segmentation" -version = "1.10.1" +version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1dd624098567895118886609431a7c3b8f516e41d30e0643f03d94592a147e36" +checksum = "d4c87d22b6e3f4a18d4d40ef354e97c90fcb14dd91d7dc0aa9d8a1172ebf7202" [[package]] name = "unicode-width" @@ -2905,9 +2903,9 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "wasm-bindgen" -version = "0.2.89" +version = "0.2.91" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ed0d4f68a3015cc185aff4db9506a015f4b96f95303897bfa23f846db54064e" +checksum = "c1e124130aee3fb58c5bdd6b639a0509486b0338acaaae0c84a5124b0f588b7f" dependencies = [ "cfg-if", "wasm-bindgen-macro", @@ -2915,24 +2913,24 @@ dependencies = [ [[package]] name = "wasm-bindgen-backend" -version = "0.2.89" +version = "0.2.91" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b56f625e64f3a1084ded111c4d5f477df9f8c92df113852fa5a374dbda78826" +checksum = "c9e7e1900c352b609c8488ad12639a311045f40a35491fb69ba8c12f758af70b" dependencies = [ "bumpalo", "log", "once_cell", "proc-macro2", "quote", - "syn 2.0.41", + "syn 2.0.48", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-futures" -version = "0.4.39" +version = "0.4.41" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac36a15a220124ac510204aec1c3e5db8a22ab06fd6706d881dc6149f8ed9a12" +checksum = "877b9c3f61ceea0e56331985743b13f3d25c406a7098d45180fb5f09bc19ed97" dependencies = [ "cfg-if", "js-sys", @@ -2942,9 +2940,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.89" +version = "0.2.91" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0162dbf37223cd2afce98f3d0785506dcb8d266223983e4b5b525859e6e182b2" +checksum = "b30af9e2d358182b5c7449424f017eba305ed32a7010509ede96cdc4696c46ed" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -2952,28 +2950,28 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.89" +version = "0.2.91" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f0eb82fcb7930ae6219a7ecfd55b217f5f0893484b7a13022ebb2b2bf20b5283" +checksum = "642f325be6301eb8107a83d12a8ac6c1e1c54345a7ef1a9261962dfefda09e66" dependencies = [ "proc-macro2", "quote", - "syn 2.0.41", + "syn 2.0.48", "wasm-bindgen-backend", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-shared" -version = "0.2.89" +version = "0.2.91" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ab9b36309365056cd639da3134bf87fa8f3d86008abf99e612384a6eecd459f" +checksum = "4f186bd2dcf04330886ce82d6f33dd75a7bfcf69ecf5763b89fcde53b6ac9838" [[package]] name = "web-sys" -version = "0.3.66" +version = "0.3.68" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50c24a44ec86bb68fbecd1b3efed7e85ea5621b39b35ef2766b66cd984f8010f" +checksum = "96565907687f7aceb35bc5fc03770a8a0471d82e479f25832f54a0e3f4b28446" dependencies = [ "js-sys", "wasm-bindgen", @@ -3021,20 +3019,11 @@ dependencies = [ [[package]] name = "windows-core" -version = "0.51.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1f8cf84f35d2db49a46868f947758c7a1138116f7fac3bc844f43ade1292e64" -dependencies = [ - "windows-targets 0.48.5", -] - -[[package]] -name = "windows-sys" -version = "0.45.0" +version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0" +checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9" dependencies = [ - "windows-targets 0.42.2", + "windows-targets 0.52.0", ] [[package]] @@ -3055,21 +3044,6 @@ dependencies = [ "windows-targets 0.52.0", ] -[[package]] -name = "windows-targets" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e5180c00cd44c9b1c88adb3693291f1cd93605ded80c250a75d472756b4d071" -dependencies = [ - "windows_aarch64_gnullvm 0.42.2", - "windows_aarch64_msvc 0.42.2", - "windows_i686_gnu 0.42.2", - "windows_i686_msvc 0.42.2", - "windows_x86_64_gnu 0.42.2", - "windows_x86_64_gnullvm 0.42.2", - "windows_x86_64_msvc 0.42.2", -] - [[package]] name = "windows-targets" version = "0.48.5" @@ -3100,12 +3074,6 @@ dependencies = [ "windows_x86_64_msvc 0.52.0", ] -[[package]] -name = "windows_aarch64_gnullvm" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8" - [[package]] name = "windows_aarch64_gnullvm" version = "0.48.5" @@ -3118,12 +3086,6 @@ version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cb7764e35d4db8a7921e09562a0304bf2f93e0a51bfccee0bd0bb0b666b015ea" -[[package]] -name = "windows_aarch64_msvc" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43" - [[package]] name = "windows_aarch64_msvc" version = "0.48.5" @@ -3136,12 +3098,6 @@ version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bbaa0368d4f1d2aaefc55b6fcfee13f41544ddf36801e793edbbfd7d7df075ef" -[[package]] -name = "windows_i686_gnu" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f" - [[package]] name = "windows_i686_gnu" version = "0.48.5" @@ -3154,12 +3110,6 @@ version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a28637cb1fa3560a16915793afb20081aba2c92ee8af57b4d5f28e4b3e7df313" -[[package]] -name = "windows_i686_msvc" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060" - [[package]] name = "windows_i686_msvc" version = "0.48.5" @@ -3172,12 +3122,6 @@ version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ffe5e8e31046ce6230cc7215707b816e339ff4d4d67c65dffa206fd0f7aa7b9a" -[[package]] -name = "windows_x86_64_gnu" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36" - [[package]] name = "windows_x86_64_gnu" version = "0.48.5" @@ -3190,12 +3134,6 @@ version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3d6fa32db2bc4a2f5abeacf2b69f7992cd09dca97498da74a151a3132c26befd" -[[package]] -name = "windows_x86_64_gnullvm" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3" - [[package]] name = "windows_x86_64_gnullvm" version = "0.48.5" @@ -3208,12 +3146,6 @@ version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1a657e1e9d3f514745a572a6846d3c7aa7dbe1658c056ed9c3344c4109a6949e" -[[package]] -name = "windows_x86_64_msvc" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0" - [[package]] name = "windows_x86_64_msvc" version = "0.48.5" @@ -3238,9 +3170,9 @@ dependencies = [ [[package]] name = "xattr" -version = "1.1.3" +version = "1.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a7dae5072fe1f8db8f8d29059189ac175196e410e40ba42d5d4684ae2f750995" +checksum = "8da84f1a25939b27f6820d92aed108f83ff920fdf11a7b19366c27c4cda81d4f" dependencies = [ "libc", "linux-raw-sys", diff --git a/accuracy/src/main.rs b/accuracy/src/main.rs index 079d82a..46481d7 100644 --- a/accuracy/src/main.rs +++ b/accuracy/src/main.rs @@ -21,7 +21,7 @@ static CFG: SpellerConfig = SpellerConfig { beam: None, case_handling: Some(CaseHandlingConfig::default()), node_pool_size: 128, - completion_marker: None, + continuation_marker: None, }; fn load_words( diff --git a/divvunspell-bin/src/main.rs b/divvunspell-bin/src/main.rs index 1a97214..21d6286 100644 --- a/divvunspell-bin/src/main.rs +++ b/divvunspell-bin/src/main.rs @@ -1,9 +1,14 @@ use std::io::{self, Read}; +use std::process; use std::{ path::{Path, PathBuf}, sync::Arc, }; +use divvunspell::speller::HfstSpeller; +use divvunspell::transducer::hfst::HfstTransducer; +use divvunspell::transducer::Transducer; +use divvunspell::vfs::Fs; use gumdrop::Options; use serde::Serialize; @@ -24,12 +29,15 @@ use divvunspell::{ trait OutputWriter { fn write_correction(&mut self, word: &str, is_correct: bool); fn write_suggestions(&mut self, word: &str, suggestions: &[Suggestion]); - fn write_analyses(&mut self, word: &str, analyses: &[Suggestion]); + fn write_input_analyses(&mut self, word: &str, analyses: &[Suggestion]); + fn write_output_analyses(&mut self, word: &str, analyses: &[Suggestion]); fn write_predictions(&mut self, predictions: &[String]); fn finish(&mut self); } -struct StdoutWriter; +struct StdoutWriter { + has_continuation_marker: Option, +} impl OutputWriter for StdoutWriter { fn write_correction(&mut self, word: &str, is_correct: bool) { @@ -41,10 +49,18 @@ impl OutputWriter for StdoutWriter { } fn write_suggestions(&mut self, _word: &str, suggestions: &[Suggestion]) { - for sugg in suggestions { - - println!("{}\t\t{} (is complete {})", sugg.value, sugg.weight, - sugg.completed); + if let Some(s) = &self.has_continuation_marker { + for sugg in suggestions { + print!("{}", sugg.value); + if !sugg.completed { + print!("{s}"); + } + println!("\t\t{}", sugg.weight); + } + } else { + for sugg in suggestions { + println!("{}\t\t{}", sugg.value, sugg.weight); + } } println!(); } @@ -54,8 +70,16 @@ impl OutputWriter for StdoutWriter { println!("{}", predictions.join(" ")); } - fn write_analyses(&mut self, _word: &str, suggestions: &[Suggestion]) { - println!("Analyses: "); + fn write_input_analyses(&mut self, _word: &str, suggestions: &[Suggestion]) { + println!("Input analyses: "); + for sugg in suggestions { + println!("{}\t\t{}", sugg.value, sugg.weight); + } + println!(); + } + + fn write_output_analyses(&mut self, _word: &str, suggestions: &[Suggestion]) { + println!("Output analyses: "); for sugg in suggestions { println!("{}\t\t{}", sugg.value, sugg.weight); } @@ -73,18 +97,27 @@ struct SuggestionRequest { } #[derive(Serialize)] +struct AnalysisRequest { + word: String, + suggestions: Vec, +} + +#[derive(Default, Serialize)] #[serde(rename_all = "camelCase")] struct JsonWriter { + #[serde(skip_serializing_if = "Vec::is_empty")] suggest: Vec, - predict: Option>, + #[serde(skip_serializing_if = "Vec::is_empty")] + predict: Vec, + #[serde(skip_serializing_if = "Vec::is_empty")] + input_analysis: Vec, + #[serde(skip_serializing_if = "Vec::is_empty")] + output_analysis: Vec, } impl JsonWriter { pub fn new() -> JsonWriter { - JsonWriter { - suggest: vec![], - predict: None, - } + Self::default() } } @@ -103,12 +136,21 @@ impl OutputWriter for JsonWriter { } fn write_predictions(&mut self, predictions: &[String]) { - self.predict = Some(predictions.to_vec()); + self.predict = predictions.to_vec(); } - fn write_analyses(&mut self, _word: &str, suggestions: &[Suggestion]) { - let i = self.suggest.len() - 1; - self.suggest[i].suggestions = suggestions.to_vec(); + fn write_input_analyses(&mut self, word: &str, suggestions: &[Suggestion]) { + self.input_analysis.push(AnalysisRequest { + word: word.to_string(), + suggestions: suggestions.to_vec(), + }) + } + + fn write_output_analyses(&mut self, word: &str, suggestions: &[Suggestion]) { + self.output_analysis.push(AnalysisRequest { + word: word.to_string(), + suggestions: suggestions.to_vec(), + }) } fn finish(&mut self) { @@ -120,7 +162,7 @@ fn run( speller: Arc, words: Vec, writer: &mut dyn OutputWriter, - analyse: bool, + is_analyzing: bool, is_suggesting: bool, is_always_suggesting: bool, suggest_cfg: &SpellerConfig, @@ -133,14 +175,17 @@ fn run( let suggestions = speller.clone().suggest_with_config(&word, &suggest_cfg); writer.write_suggestions(&word, &suggestions); } - if analyse { - let input_analyses = speller.clone().analyse_with_config(&word, - &suggest_cfg); - writer.write_analyses(&word, &input_analyses); - let output_analyses = - speller.clone().suggest_with_analyse_with_config(&word, - &suggest_cfg); - writer.write_analyses(&word, &output_analyses); + + if is_analyzing { + let input_analyses = speller + .clone() + .analyze_input_with_config(&word, &suggest_cfg); + writer.write_input_analyses(&word, &input_analyses); + + let output_analyses = speller + .clone() + .analyze_output_with_config(&word, &suggest_cfg); + writer.write_output_analyses(&word, &output_analyses); } } } @@ -170,14 +215,20 @@ struct SuggestArgs { #[options(help = "print help message")] help: bool, - #[options(help = "BHFST or ZHFST archive to be used", required)] - archive: PathBuf, + #[options(short = "a", help = "BHFST or ZHFST archive to be used")] + archive_path: Option, + + #[options(long = "mutator", help = "mutator to use (if archive not provided)")] + mutator_path: Option, + + #[options(long = "lexicon", help = "lexicon to use (if archive not provided)")] + lexicon_path: Option, #[options(short = "S", help = "always show suggestions even if word is correct")] always_suggest: bool, - #[options(short = "a", help = "analyse words and suggestions")] - analyse: bool, + #[options(short = "A", help = "analyze words and suggestions")] + analyze: bool, #[options(help = "maximum weight limit for suggestions")] weight: Option, @@ -185,7 +236,7 @@ struct SuggestArgs { #[options(help = "maximum number of results")] nbest: Option, - #[options(help = "Character for incomplete predictions")] + #[options(help = "character for incomplete predictions")] continuation_marker: Option, #[options( @@ -315,7 +366,7 @@ fn suggest(args: SuggestArgs) -> anyhow::Result<()> { if args.disable_case_handling { suggest_cfg.case_handling = None; } - suggest_cfg.completion_marker = args.continuation_marker; + suggest_cfg.continuation_marker = args.continuation_marker.clone(); if let Some(v) = args.nbest { if v == 0 { suggest_cfg.n_best = None; @@ -335,7 +386,9 @@ fn suggest(args: SuggestArgs) -> anyhow::Result<()> { let mut writer: Box = if args.use_json { Box::new(JsonWriter::new()) } else { - Box::new(StdoutWriter) + Box::new(StdoutWriter { + has_continuation_marker: args.continuation_marker, + }) }; let words = if args.inputs.is_empty() { @@ -353,13 +406,26 @@ fn suggest(args: SuggestArgs) -> anyhow::Result<()> { args.inputs.into_iter().collect() }; - let archive = load_archive(&args.archive)?; - let speller = archive.speller(); + let speller = if let Some(archive_path) = args.archive_path { + let archive = load_archive(&archive_path)?; + let speller = archive.speller(); + speller + } else if let (Some(lexicon_path), Some(mutator_path)) = (args.lexicon_path, args.mutator_path) + { + let acceptor = HfstTransducer::from_path(&Fs, lexicon_path)?; + let errmodel = HfstTransducer::from_path(&Fs, mutator_path)?; + + HfstSpeller::new(errmodel, acceptor) as _ + } else { + eprintln!("Either a BHFST or ZHFST archive must be provided, or a mutator and lexicon."); + process::exit(1); + }; + run( speller, words, &mut *writer, - args.analyse, + args.analyze, true, args.always_suggest, &suggest_cfg, @@ -443,7 +509,6 @@ fn predict(_args: PredictArgs) -> anyhow::Result<()> { fn main() -> anyhow::Result<()> { pretty_env_logger::init(); - let args = Args::parse_args_default_or_exit(); match args.command { diff --git a/divvunspell/src/archive/meta.rs b/divvunspell/src/archive/meta.rs index 9d6b47c..81cdf81 100644 --- a/divvunspell/src/archive/meta.rs +++ b/divvunspell/src/archive/meta.rs @@ -90,7 +90,7 @@ fn test_xml_parse() { se Giellatekno/Divvun/UiT fst-based speller for Northern Sami This is an fst-based speller for Northern Sami. It is based - on the normative subset of the morphological analyser for Northern Sami. + on the normative subset of the morphological analyzer for Northern Sami. The source code can be found at: https://victorio.uit.no/langtech/trunk/langs/sme/ License: GPL3+. diff --git a/divvunspell/src/speller/mod.rs b/divvunspell/src/speller/mod.rs index 8a36aab..9d397f4 100644 --- a/divvunspell/src/speller/mod.rs +++ b/divvunspell/src/speller/mod.rs @@ -29,7 +29,7 @@ pub struct SpellerConfig { pub beam: Option, pub case_handling: Option, pub node_pool_size: usize, - pub completion_marker: Option, + pub continuation_marker: Option, } impl SpellerConfig { @@ -40,7 +40,7 @@ impl SpellerConfig { beam: None, case_handling: Some(CaseHandlingConfig::default()), node_pool_size: 128, - completion_marker: None, + continuation_marker: None, } } } @@ -55,19 +55,22 @@ impl CaseHandlingConfig { } } -pub trait Speller { +pub trait Speller: Analyzer { fn is_correct(self: Arc, word: &str) -> bool; fn is_correct_with_config(self: Arc, word: &str, config: &SpellerConfig) -> bool; fn suggest(self: Arc, word: &str) -> Vec; fn suggest_with_config(self: Arc, word: &str, config: &SpellerConfig) -> Vec; -//} - -//pub trait Analyser { - fn analyse(self: Arc, word: &str) -> Vec; - fn analyse_with_config(self: Arc, word: &str, config: &SpellerConfig) -> Vec; - fn suggest_with_analyse(self: Arc, word: &str) -> Vec; - fn suggest_with_analyse_with_config(self: Arc, word: &str, config: - &SpellerConfig) -> Vec; +} + +pub trait Analyzer { + fn analyze_input(self: Arc, word: &str) -> Vec; + fn analyze_input_with_config(self: Arc, word: &str, config: &SpellerConfig) -> Vec; + fn analyze_output(self: Arc, word: &str) -> Vec; + fn analyze_output_with_config( + self: Arc, + word: &str, + config: &SpellerConfig, + ) -> Vec; } impl Speller for HfstSpeller @@ -133,47 +136,48 @@ where self.suggest_single(word, config) } } -//} - -//impl Analyser for HfstSpeller -//where -// F: crate::vfs::File + Send, -// T: Transducer + Send, -// U: Transducer + Send, -//{ +} + +impl Analyzer for HfstSpeller +where + F: crate::vfs::File + Send, + T: Transducer + Send, + U: Transducer + Send, +{ #[allow(clippy::wrong_self_convention)] - fn analyse_with_config(self: Arc, word: &str, config: &SpellerConfig) -> Vec { + fn analyze_input_with_config(self: Arc, word: &str, config: &SpellerConfig) -> Vec { if word.len() == 0 { return vec![]; } let worker = SpellerWorker::new(self.clone(), self.to_input_vec(&word), config.clone()); - log::trace!("Beginning analyse with config in mod"); - worker.analyse() - - //vec![] + log::trace!("Beginning analyze with config in mod"); + worker.analyze() } #[inline] - fn analyse(self: Arc, word: &str) -> Vec { - self.analyse_with_config(word, &SpellerConfig::default()) + fn analyze_input(self: Arc, word: &str) -> Vec { + self.analyze_input_with_config(word, &SpellerConfig::default()) } #[inline] - fn suggest_with_analyse(self: Arc, word: &str) -> Vec { - self.suggest_with_analyse_with_config(word, &SpellerConfig::default()) + fn analyze_output(self: Arc, word: &str) -> Vec { + self.analyze_output_with_config(word, &SpellerConfig::default()) } - fn suggest_with_analyse_with_config(self: Arc, word: &str, config: &SpellerConfig) -> Vec { + fn analyze_output_with_config( + self: Arc, + word: &str, + config: &SpellerConfig, + ) -> Vec { if word.len() == 0 { return vec![]; } - log::trace!("Beginning analyse suggest with config in mod"); + log::trace!("Beginning analyze suggest with config in mod"); let worker = SpellerWorker::new(self.clone(), self.to_input_vec(word), config.clone()); worker.suggest() - } } @@ -324,7 +328,7 @@ where return vec![]; } let mut out: Vec; - if let Some(s) = &config.completion_marker { + if let Some(s) = &config.continuation_marker { out = best .into_iter() .map(|(k, v)| Suggestion { @@ -332,9 +336,8 @@ where weight: v, completed: !k.ends_with(s), }) - .collect::>(); - } - else { + .collect::>(); + } else { out = best .into_iter() .map(|(k, v)| Suggestion { @@ -342,7 +345,7 @@ where weight: v, completed: true, }) - .collect::>(); + .collect::>(); } out.sort(); if let Some(n_best) = config.n_best { diff --git a/divvunspell/src/speller/suggestion.rs b/divvunspell/src/speller/suggestion.rs index 055e638..bd9786b 100644 --- a/divvunspell/src/speller/suggestion.rs +++ b/divvunspell/src/speller/suggestion.rs @@ -13,7 +13,11 @@ pub struct Suggestion { impl Suggestion { pub fn new(value: SmolStr, weight: Weight, completed: bool) -> Suggestion { - Suggestion { value, weight, completed } + Suggestion { + value, + weight, + completed, + } } pub fn value(&self) -> &str { @@ -23,7 +27,7 @@ impl Suggestion { pub fn weight(&self) -> Weight { self.weight } - + pub fn completed(&self) -> bool { self.completed } diff --git a/divvunspell/src/speller/worker.rs b/divvunspell/src/speller/worker.rs index d8ceb9e..635b807 100644 --- a/divvunspell/src/speller/worker.rs +++ b/divvunspell/src/speller/worker.rs @@ -502,8 +502,8 @@ where false } - pub(crate) fn analyse(&self) -> Vec { - log::trace!("Beginning analyse"); + pub(crate) fn analyze(&self) -> Vec { + log::trace!("Beginning analyze"); let pool = Pool::with_size_and_max(0, 0); let mut nodes = speller_start_node(&pool, self.state_size() as usize); let mut lookups = HashMap::new(); @@ -536,7 +536,6 @@ where *entry = weight; } } - } analyses = self.generate_sorted_suggestions(&lookups); } @@ -623,8 +622,9 @@ where *entry = weight; } } + + suggestions = self.generate_sorted_suggestions(&corrections); } - suggestions = self.generate_sorted_suggestions(&corrections); suggestions } @@ -634,13 +634,12 @@ where ) -> Vec { log::trace!("Generating sorted suggestions"); let mut c: Vec; - if let Some(s) = &self.config.completion_marker { + if let Some(s) = &self.config.continuation_marker { c = corrections .into_iter() .map(|x| Suggestion::new(x.0.clone(), *x.1, x.0.ends_with(s))) .collect(); - } - else { + } else { c = corrections .into_iter() .map(|x| Suggestion::new(x.0.clone(), *x.1, true)) From a5170ccfa98830a34b62b7d07a31f695b0cd2b05 Mon Sep 17 00:00:00 2001 From: Brendan Molloy Date: Sat, 2 Mar 2024 14:43:26 +0100 Subject: [PATCH 09/21] Make completed optional --- divvunspell-bin/src/main.rs | 2 +- divvunspell/src/speller/mod.rs | 16 ++++++++++++---- divvunspell/src/speller/suggestion.rs | 7 ++++--- divvunspell/src/speller/worker.rs | 6 +++--- 4 files changed, 20 insertions(+), 11 deletions(-) diff --git a/divvunspell-bin/src/main.rs b/divvunspell-bin/src/main.rs index 21d6286..dbc362e 100644 --- a/divvunspell-bin/src/main.rs +++ b/divvunspell-bin/src/main.rs @@ -52,7 +52,7 @@ impl OutputWriter for StdoutWriter { if let Some(s) = &self.has_continuation_marker { for sugg in suggestions { print!("{}", sugg.value); - if !sugg.completed { + if sugg.completed == Some(true) { print!("{s}"); } println!("\t\t{}", sugg.weight); diff --git a/divvunspell/src/speller/mod.rs b/divvunspell/src/speller/mod.rs index 9d397f4..16d0782 100644 --- a/divvunspell/src/speller/mod.rs +++ b/divvunspell/src/speller/mod.rs @@ -64,7 +64,11 @@ pub trait Speller: Analyzer { pub trait Analyzer { fn analyze_input(self: Arc, word: &str) -> Vec; - fn analyze_input_with_config(self: Arc, word: &str, config: &SpellerConfig) -> Vec; + fn analyze_input_with_config( + self: Arc, + word: &str, + config: &SpellerConfig, + ) -> Vec; fn analyze_output(self: Arc, word: &str) -> Vec; fn analyze_output_with_config( self: Arc, @@ -145,7 +149,11 @@ where U: Transducer + Send, { #[allow(clippy::wrong_self_convention)] - fn analyze_input_with_config(self: Arc, word: &str, config: &SpellerConfig) -> Vec { + fn analyze_input_with_config( + self: Arc, + word: &str, + config: &SpellerConfig, + ) -> Vec { if word.len() == 0 { return vec![]; } @@ -334,7 +342,7 @@ where .map(|(k, v)| Suggestion { value: k.clone(), weight: v, - completed: !k.ends_with(s), + completed: Some(!k.ends_with(s)), }) .collect::>(); } else { @@ -343,7 +351,7 @@ where .map(|(k, v)| Suggestion { value: k, weight: v, - completed: true, + completed: None, }) .collect::>(); } diff --git a/divvunspell/src/speller/suggestion.rs b/divvunspell/src/speller/suggestion.rs index bd9786b..38c847f 100644 --- a/divvunspell/src/speller/suggestion.rs +++ b/divvunspell/src/speller/suggestion.rs @@ -8,11 +8,12 @@ use std::cmp::Ordering::Equal; pub struct Suggestion { pub value: SmolStr, pub weight: Weight, - pub completed: bool, + #[serde(skip_serializing_if = "Option::is_none")] + pub completed: Option, } impl Suggestion { - pub fn new(value: SmolStr, weight: Weight, completed: bool) -> Suggestion { + pub fn new(value: SmolStr, weight: Weight, completed: Option) -> Suggestion { Suggestion { value, weight, @@ -28,7 +29,7 @@ impl Suggestion { self.weight } - pub fn completed(&self) -> bool { + pub fn completed(&self) -> Option { self.completed } } diff --git a/divvunspell/src/speller/worker.rs b/divvunspell/src/speller/worker.rs index 635b807..287e470 100644 --- a/divvunspell/src/speller/worker.rs +++ b/divvunspell/src/speller/worker.rs @@ -622,7 +622,7 @@ where *entry = weight; } } - + suggestions = self.generate_sorted_suggestions(&corrections); } suggestions @@ -637,12 +637,12 @@ where if let Some(s) = &self.config.continuation_marker { c = corrections .into_iter() - .map(|x| Suggestion::new(x.0.clone(), *x.1, x.0.ends_with(s))) + .map(|x| Suggestion::new(x.0.clone(), *x.1, Some(x.0.ends_with(s)))) .collect(); } else { c = corrections .into_iter() - .map(|x| Suggestion::new(x.0.clone(), *x.1, true)) + .map(|x| Suggestion::new(x.0.clone(), *x.1, None)) .collect(); } c.sort(); From 17f5b2441ae48769736398a1502beee603e2035c Mon Sep 17 00:00:00 2001 From: Brendan Molloy Date: Fri, 10 May 2024 19:58:20 +0200 Subject: [PATCH 10/21] WIP --- Cargo.lock | 702 +++++++++++++++------------------ accuracy/Cargo.toml | 2 +- divvunspell-bin/Cargo.toml | 2 +- divvunspell/Cargo.toml | 22 +- divvunspell/src/archive/mod.rs | 2 +- divvunspell/src/speller/mod.rs | 2 +- 6 files changed, 336 insertions(+), 396 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 6de6c4b..bb0727a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -35,15 +35,27 @@ checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" [[package]] name = "ahash" -version = "0.7.7" +version = "0.7.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a824f2aa7e75a0c98c5a504fceb80649e9c35265d44525b5f94de4771a395cd" +checksum = "891477e0c6a8957309ee5c45a6368af3ae14bb510732d2684ffa19af310920f9" dependencies = [ "getrandom", "once_cell", "version_check", ] +[[package]] +name = "ahash" +version = "0.8.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b79b82693f705137f8fb9b37871d99e4f9a7df12b917eed79c3d3954830a60b" +dependencies = [ + "cfg-if", + "once_cell", + "version_check", + "zerocopy", +] + [[package]] name = "aho-corasick" version = "1.1.2" @@ -68,6 +80,12 @@ dependencies = [ "alloc-no-stdlib", ] +[[package]] +name = "allocator-api2" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0942ffc6dcaadf03badf6e6a2d0228460359d5e34b57ccdc720b7382dfbd5ec5" + [[package]] name = "android-tzdata" version = "0.1.1" @@ -92,11 +110,59 @@ dependencies = [ "winapi", ] +[[package]] +name = "anstream" +version = "0.6.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d96bd03f33fe50a863e394ee9718a706f988b9079b20c3784fb726e7678b62fb" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8901269c6307e8d93993578286ac0edf7f195079ffff5ebdeea6a59ffb7e36bc" + +[[package]] +name = "anstyle-parse" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c75ac65da39e5fe5ab759307499ddad880d724eed2f6ce5b5e8a26f4f387928c" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e28923312444cdd728e4738b3f9c9cac739500909bb3d3c94b43551b16517648" +dependencies = [ + "windows-sys 0.52.0", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1cd54b81ec8d6180e24654d0b371ad22fc3dd083b6ff8ba325b72e00c87660a7" +dependencies = [ + "anstyle", + "windows-sys 0.52.0", +] + [[package]] name = "anyhow" -version = "1.0.79" +version = "1.0.80" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "080e9890a082662b09c1ad45f567faeeb47f22b5fb23895fbe1e651e718e25ca" +checksum = "5ad32ce52e4161730f7098c077cd2ed6229b5804ccf99e5366be1ab72a98b4e1" [[package]] name = "atty" @@ -109,15 +175,6 @@ dependencies = [ "winapi", ] -[[package]] -name = "autocfg" -version = "0.1.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0dde43e75fd43e8a1bf86103336bc699aa8d17ad1be60c76c0bdfd4828e19b78" -dependencies = [ - "autocfg 1.1.0", -] - [[package]] name = "autocfg" version = "1.1.0" @@ -176,7 +233,7 @@ dependencies = [ "comde", "fastvlq", "log", - "memmap2", + "memmap2 0.5.10", "pathdiff", "relative-path", "serde_json", @@ -209,9 +266,9 @@ dependencies = [ [[package]] name = "bstr" -version = "1.9.0" +version = "1.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c48f0051a4b4c5e0b6d365cd04af53aeaa209e3cc15ec2cdb69e73cc87fbd0dc" +checksum = "05efc5cfd9110c8416e471df0e96702d58690178e206e61b7173706673c93706" dependencies = [ "memchr", "serde", @@ -219,9 +276,9 @@ dependencies = [ [[package]] name = "bumpalo" -version = "3.14.0" +version = "3.15.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f30e7476521f6f8af1a1c4c0b8cc94f0bee37d91763d0ca2665f299b6cd8aec" +checksum = "8ea184aa71bb362a1157c896979544cc23974e08fd265f29ea96b59f0b4a555b" [[package]] name = "byteorder" @@ -267,7 +324,7 @@ dependencies = [ "glob", "indicatif 0.16.2", "log", - "rand 0.8.5", + "rand", "reqwest", "serde", "serde_json", @@ -281,19 +338,16 @@ dependencies = [ [[package]] name = "cc" -version = "1.0.83" +version = "1.0.88" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0" +checksum = "02f341c093d19155a6e41631ce5971aac4e9a868262212153124c15fa22d1cdc" dependencies = [ - "jobserver", "libc", ] [[package]] name = "cffi" -version = "0.1.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5e4ef6239aac8a2d645d60f08cc345889659f64c815ce204de14e6ffc2b52ed" +version = "0.2.0-dev" dependencies = [ "cffi-impl", "libc", @@ -302,20 +356,18 @@ dependencies = [ [[package]] name = "cffi-impl" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c5991ed1ca79f668096f267671e6035156f23871a8a2dbd88a38dd43a8c73c68" +version = "0.2.0-dev" dependencies = [ "ctor", "darling", - "heck", + "heck 0.4.1", "log", "phf", "phf_codegen", "pretty_env_logger", "proc-macro2", "quote", - "syn 1.0.109", + "syn 2.0.52", ] [[package]] @@ -326,16 +378,16 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "chrono" -version = "0.4.33" +version = "0.4.34" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9f13690e35a5e4ace198e7beea2895d29f3a9cc55015fcebe6336bd2010af9eb" +checksum = "5bc015644b92d5890fab7489e49d21f879d5c990186827d42ec511919404f38b" dependencies = [ "android-tzdata", "iana-time-zone", "js-sys", "num-traits", "wasm-bindgen", - "windows-targets 0.52.0", + "windows-targets 0.52.4", ] [[package]] @@ -354,13 +406,10 @@ dependencies = [ ] [[package]] -name = "cloudabi" -version = "0.0.3" +name = "colorchoice" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ddfc5b9aa5d4507acaf872de71051dfd0e309860e88966e1051e462a077aac4f" -dependencies = [ - "bitflags 1.3.2", -] +checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7" [[package]] name = "comde" @@ -425,9 +474,9 @@ dependencies = [ [[package]] name = "crc32fast" -version = "1.3.2" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d" +checksum = "b3855a8a784b474f333699ef2bbca9db2c4a1f6d9088a90a2d25b1eb53111eaa" dependencies = [ "cfg-if", ] @@ -490,34 +539,34 @@ dependencies = [ [[package]] name = "ctor" -version = "0.1.26" +version = "0.2.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d2301688392eb071b0bf1a37be05c469d3cc4dbbd95df672fe28ab021e6a096" +checksum = "ad291aa74992b9b7a7e88c38acbbf6ad7e107f1d90ee8775b7bc1fc3394f485c" dependencies = [ "quote", - "syn 1.0.109", + "syn 2.0.52", ] [[package]] name = "curl" -version = "0.4.44" +version = "0.4.46" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "509bd11746c7ac09ebd19f0b17782eae80aadee26237658a6b4808afb5c11a22" +checksum = "1e2161dd6eba090ff1594084e95fd67aeccf04382ffea77999ea94ed42ec67b6" dependencies = [ "curl-sys", "libc", "openssl-probe", "openssl-sys", "schannel", - "socket2 0.4.10", - "winapi", + "socket2", + "windows-sys 0.52.0", ] [[package]] name = "curl-sys" -version = "0.4.71+curl-8.6.0" +version = "0.4.72+curl-8.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c7b12a7ab780395666cb576203dc3ed6e01513754939a600b85196ccf5356bc5" +checksum = "29cbdc8314c447d11e8fd156dcdd031d9e02a7a976163e396b548c03153bc9ea" dependencies = [ "cc", "libc", @@ -525,14 +574,14 @@ dependencies = [ "openssl-sys", "pkg-config", "vcpkg", - "windows-sys 0.48.0", + "windows-sys 0.52.0", ] [[package]] name = "darling" -version = "0.10.2" +version = "0.20.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d706e75d87e35569db781a9b5e2416cff1236a47ed380831f959382ccd5f858" +checksum = "54e36fcd13ed84ffdfda6f5be89b31287cbb80c439841fe69e04841435464391" dependencies = [ "darling_core", "darling_macro", @@ -540,27 +589,27 @@ dependencies = [ [[package]] name = "darling_core" -version = "0.10.2" +version = "0.20.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f0c960ae2da4de88a91b2d920c2a7233b400bc33cb28453a2987822d8392519b" +checksum = "9c2cf1c23a687a1feeb728783b993c4e1ad83d99f351801977dd809b48d0a70f" dependencies = [ "fnv", "ident_case", "proc-macro2", "quote", - "strsim 0.9.3", - "syn 1.0.109", + "strsim 0.10.0", + "syn 2.0.52", ] [[package]] name = "darling_macro" -version = "0.10.2" +version = "0.20.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d9b5a2f4ac4969822c62224815d069952656cadc7084fdca9751e6d959189b72" +checksum = "a668eda54683121533a393014d8692171709ff57a7d61f187b6e782719f8933f" dependencies = [ "darling_core", "quote", - "syn 1.0.109", + "syn 2.0.52", ] [[package]] @@ -618,17 +667,17 @@ dependencies = [ "byteorder", "cffi", "eieio", - "env_logger 0.9.3", + "env_logger 0.11.2", "flatbuffers", "fs_extra", "globwalk", - "hashbrown 0.11.2", - "itertools", + "hashbrown 0.14.3", + "itertools 0.12.1", "language-tags", "libc", "lifeguard", "log", - "memmap2", + "memmap2 0.9.4", "parking_lot", "pathos", "rust-bert", @@ -637,7 +686,7 @@ dependencies = [ "serde-xml-rs", "serde_json", "smol_str", - "strsim 0.10.0", + "strsim 0.11.0", "tch", "tempfile", "thiserror", @@ -671,9 +720,9 @@ checksum = "0f3dd87dfc333e61e1be9ec6d6a4d946a98ece074da331eeb5d1c0b98b47ee34" [[package]] name = "either" -version = "1.9.0" +version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07" +checksum = "11157ac094ffbdde99aa67b23417ebdd801842852b500e395a45a9c0aac03e4a" [[package]] name = "encode_unicode" @@ -691,31 +740,41 @@ dependencies = [ ] [[package]] -name = "env_logger" -version = "0.7.1" +name = "env_filter" +version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "44533bbbb3bb3c1fa17d9f2e4e38bbbaf8396ba82193c4cb1b6445d711445d36" +checksum = "a009aa4810eb158359dda09d0c87378e4bbb89b5a801f016885a4707ba24f7ea" dependencies = [ - "atty", - "humantime 1.3.0", "log", "regex", - "termcolor", ] [[package]] name = "env_logger" -version = "0.9.3" +version = "0.10.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a12e6657c4c97ebab115a42dcee77225f7f482cdd841cf7088c657a42e9e00e7" +checksum = "4cd405aab171cb85d6735e5c8d9db038c17d3ca007a4d2c25f337935c3d90580" dependencies = [ - "atty", - "humantime 2.1.0", + "humantime", + "is-terminal", "log", "regex", "termcolor", ] +[[package]] +name = "env_logger" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c012a26a7f605efc424dd53697843a72be7dc86ad2d01f7814337794a12231d" +dependencies = [ + "anstream", + "anstyle", + "env_filter", + "humantime", + "log", +] + [[package]] name = "equivalent" version = "1.0.1" @@ -755,7 +814,7 @@ checksum = "1ee447700ac8aa0b2f2bd7bc4462ad686ba06baa6727ac149a2d6277f0d240fd" dependencies = [ "cfg-if", "libc", - "redox_syscall 0.4.1", + "redox_syscall", "windows-sys 0.52.0", ] @@ -833,12 +892,6 @@ version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c" -[[package]] -name = "fuchsia-cprng" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a06f77d526c1a601b7c4cdd98f54b5eaabffc14d5f2f0296febdc7f357c6d3ba" - [[package]] name = "futures-channel" version = "0.3.30" @@ -935,11 +988,11 @@ dependencies = [ [[package]] name = "globwalk" -version = "0.8.1" +version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93e3af942408868f6934a7b85134a3230832b9977cf66125df2f9edcfce4ddcc" +checksum = "0bf760ebf69878d9fd8f110c89703d90ce35095324d1f1edcb595c63945ee757" dependencies = [ - "bitflags 1.3.2", + "bitflags 2.4.2", "ignore", "walkdir", ] @@ -985,19 +1038,9 @@ dependencies = [ [[package]] name = "half" -version = "1.8.2" +version = "1.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eabb4a44450da02c90444cf74558da904edde8fb4e9035a9a6a4e15445af0bd7" - -[[package]] -name = "hashbrown" -version = "0.11.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ab5ef0d4909ef3724cc8cce6ccc8572c5c817592e9285f5464f8e86f8bd3726e" -dependencies = [ - "ahash", - "serde", -] +checksum = "1b43ede17f21864e81be2fa654110bf1e793774238d86ef8555c37e6519c0403" [[package]] name = "hashbrown" @@ -1005,7 +1048,7 @@ version = "0.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" dependencies = [ - "ahash", + "ahash 0.7.8", ] [[package]] @@ -1013,6 +1056,11 @@ name = "hashbrown" version = "0.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "290f1a1d9242c78d09ce40a5e87e7554ee637af1351968159f4952f028f75604" +dependencies = [ + "ahash 0.8.10", + "allocator-api2", + "serde", +] [[package]] name = "heck" @@ -1023,6 +1071,12 @@ dependencies = [ "unicode-segmentation", ] +[[package]] +name = "heck" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" + [[package]] name = "hermit-abi" version = "0.1.19" @@ -1034,9 +1088,9 @@ dependencies = [ [[package]] name = "hermit-abi" -version = "0.3.5" +version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d0c62115964e08cb8039170eb33c1d0e2388a256930279edca206fff675f82c3" +checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" [[package]] name = "http" @@ -1072,15 +1126,6 @@ version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" -[[package]] -name = "humantime" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df004cfca50ef23c36850aaaa59ad52cc70d0e90243c3c7737a4dd32dc7a3c4f" -dependencies = [ - "quick-error", -] - [[package]] name = "humantime" version = "2.1.0" @@ -1104,7 +1149,7 @@ dependencies = [ "httpdate", "itoa", "pin-project-lite", - "socket2 0.5.5", + "socket2", "tokio", "tower-service", "tracing", @@ -1181,9 +1226,9 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.2.2" +version = "2.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "824b2ae422412366ba479e8111fd301f7b5faece8149317bb81925979a53f520" +checksum = "7b0b929d511467233429c45a44ac1dcaa21ba0f5ba11e4879e6ed28ddb4f9df4" dependencies = [ "equivalent", "hashbrown 0.14.3", @@ -1214,15 +1259,6 @@ dependencies = [ "regex", ] -[[package]] -name = "instant" -version = "0.1.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c" -dependencies = [ - "cfg-if", -] - [[package]] name = "ipnet" version = "2.9.0" @@ -1239,6 +1275,17 @@ dependencies = [ "smallvec", ] +[[package]] +name = "is-terminal" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f23ff5ef2b80d608d61efee834934d862cd92461afc0560dedf493e4c033738b" +dependencies = [ + "hermit-abi 0.3.9", + "libc", + "windows-sys 0.52.0", +] + [[package]] name = "itertools" version = "0.10.5" @@ -1249,19 +1296,19 @@ dependencies = [ ] [[package]] -name = "itoa" -version = "1.0.10" +name = "itertools" +version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1a46d1a171d865aa5f83f92695765caa047a9b4cbae2cbf37dbd613a793fd4c" +checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569" +dependencies = [ + "either", +] [[package]] -name = "jobserver" -version = "0.1.28" +name = "itoa" +version = "1.0.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ab46a6e9526ddef3ae7f787c06f0f2600639ba80ea3eade3d8e670a2230f51d6" -dependencies = [ - "libc", -] +checksum = "b1a46d1a171d865aa5f83f92695765caa047a9b4cbae2cbf37dbd613a793fd4c" [[package]] name = "js-sys" @@ -1298,7 +1345,7 @@ checksum = "85c833ca1e66078851dba29046874e38f08b2c883700aa29a03ddd3b23814ee8" dependencies = [ "bitflags 2.4.2", "libc", - "redox_syscall 0.4.1", + "redox_syscall", ] [[package]] @@ -1331,15 +1378,15 @@ version = "0.4.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3c168f8615b12bc01f9c17e2eb0cc07dcae1940121185446edc3744920e8ef45" dependencies = [ - "autocfg 1.1.0", + "autocfg", "scopeguard", ] [[package]] name = "log" -version = "0.4.20" +version = "0.4.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f" +checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c" [[package]] name = "lzma-sys" @@ -1370,7 +1417,7 @@ version = "0.3.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7574c1cf36da4798ab73da5b215bbf444f50718207754cb522201d78d1cd0ff2" dependencies = [ - "autocfg 1.1.0", + "autocfg", "rawpointer", ] @@ -1389,6 +1436,15 @@ dependencies = [ "libc", ] +[[package]] +name = "memmap2" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fe751422e4a8caa417e13c3ea66452215d7d63e19e604f4980461212f3ae1322" +dependencies = [ + "libc", +] + [[package]] name = "mime" version = "0.3.17" @@ -1406,9 +1462,9 @@ dependencies = [ [[package]] name = "mio" -version = "0.8.10" +version = "0.8.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f3d0b296e374a4e6f3c7b0a1f5a51d748a0d34c85e7dc48fc3fa9a87657fe09" +checksum = "a4a650543ca06a924e8b371db273b2756685faae30f8487da1b56505a8f78b0c" dependencies = [ "libc", "wasi 0.11.0+wasi-snapshot-preview1", @@ -1470,7 +1526,7 @@ version = "0.2.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "da0df0e5185db44f69b44f26786fe401b6c293d1907744beaa7fa62b2e5a517a" dependencies = [ - "autocfg 1.1.0", + "autocfg", ] [[package]] @@ -1479,7 +1535,7 @@ version = "1.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43" dependencies = [ - "hermit-abi 0.3.5", + "hermit-abi 0.3.9", "libc", ] @@ -1512,9 +1568,9 @@ checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" [[package]] name = "openssl" -version = "0.10.63" +version = "0.10.64" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "15c9d69dd87a29568d4d017cfe8ec518706046a05184e5aea92d0af890b803c8" +checksum = "95a0481286a310808298130d22dd1fef0fa571e05a8f44ec801801e84b216b1f" dependencies = [ "bitflags 2.4.2", "cfg-if", @@ -1533,7 +1589,7 @@ checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.48", + "syn 2.0.52", ] [[package]] @@ -1544,9 +1600,9 @@ checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf" [[package]] name = "openssl-sys" -version = "0.9.99" +version = "0.9.101" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22e1bf214306098e4832460f797824c05d25aacdf896f64a985fb0fd992454ae" +checksum = "dda2b0f344e78efc2facf7d195d098df0dd72151b26ab98da807afc26c198dff" dependencies = [ "cc", "libc", @@ -1571,27 +1627,25 @@ checksum = "afb2e1c3ee07430c2cf76151675e583e0f19985fa6efae47d6848a3e2c824f85" [[package]] name = "parking_lot" -version = "0.11.2" +version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d17b78036a60663b797adeaee46f5c9dfebb86948d1255007a1d6be0271ff99" +checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f" dependencies = [ - "instant", "lock_api", "parking_lot_core", ] [[package]] name = "parking_lot_core" -version = "0.8.6" +version = "0.9.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60a2cfe6f0ad2bfc16aefa463b497d5c7a5ecd44a23efa72aa342d90177356dc" +checksum = "4c42a9226546d68acdd9c0a280d17ce19bfe27a46bf68784e4066115788d008e" dependencies = [ "cfg-if", - "instant", "libc", - "redox_syscall 0.2.16", + "redox_syscall", "smallvec", - "winapi", + "windows-targets 0.48.5", ] [[package]] @@ -1635,18 +1689,18 @@ checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" [[package]] name = "phf" -version = "0.7.24" +version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3da44b85f8e8dfaec21adae67f95d93244b2ecf6ad2a692320598dcc8e6dd18" +checksum = "ade2d8b8f33c7333b51bcf0428d37e217e9f32192ae4772156f65063b8ce03dc" dependencies = [ "phf_shared", ] [[package]] name = "phf_codegen" -version = "0.7.24" +version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b03e85129e324ad4166b06b2c7491ae27fe3ec353af72e72cd1654c7225d517e" +checksum = "e8d39688d359e6b34654d328e262234662d16cc0f60ec8dcbe5e718709342a5a" dependencies = [ "phf_generator", "phf_shared", @@ -1654,19 +1708,19 @@ dependencies = [ [[package]] name = "phf_generator" -version = "0.7.24" +version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09364cc93c159b8b06b1f4dd8a4398984503483891b0c26b867cf431fb132662" +checksum = "48e4cc64c2ad9ebe670cb8fd69dd50ae301650392e81c05f9bfcb2d5bdbc24b0" dependencies = [ "phf_shared", - "rand 0.6.5", + "rand", ] [[package]] name = "phf_shared" -version = "0.7.24" +version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "234f71a15de2288bcb7e3b6515828d22af7ec8598ee6d24c3b526fa0a80b67a0" +checksum = "90fcb95eef784c2ac79119d1dd819e162b5da872ce6f3c3abe1e8ca1c082f72b" dependencies = [ "siphasher", ] @@ -1685,9 +1739,9 @@ checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" [[package]] name = "pkg-config" -version = "0.3.29" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2900ede94e305130c13ddd391e0ab7cbaeb783945ae07a279c268cb05109c6cb" +checksum = "d231b230927b5e4ad203db57bbcbee2802f6bce620b1e4a9024a07d94e2907ec" [[package]] name = "ppv-lite86" @@ -1697,11 +1751,11 @@ checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" [[package]] name = "pretty_env_logger" -version = "0.4.0" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "926d36b9553851b8b0005f1275891b392ee4d2d833852c417ed025477350fb9d" +checksum = "865724d4dbe39d9f3dd3b52b88d859d66bcb2d6a0acfd5ea68a65fb66d4bdc1c" dependencies = [ - "env_logger 0.7.1", + "env_logger 0.10.2", "log", ] @@ -1744,12 +1798,6 @@ version = "2.27.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf7e6d18738ecd0902d30d1ad232c9125985a3422929b16c65517b38adc14f96" -[[package]] -name = "quick-error" -version = "1.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0" - [[package]] name = "quote" version = "1.0.35" @@ -1759,25 +1807,6 @@ dependencies = [ "proc-macro2", ] -[[package]] -name = "rand" -version = "0.6.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d71dacdc3c88c1fde3885a3be3fbab9f35724e6ce99467f7d9c5026132184ca" -dependencies = [ - "autocfg 0.1.8", - "libc", - "rand_chacha 0.1.1", - "rand_core 0.4.2", - "rand_hc", - "rand_isaac", - "rand_jitter", - "rand_os", - "rand_pcg", - "rand_xorshift", - "winapi", -] - [[package]] name = "rand" version = "0.8.5" @@ -1785,18 +1814,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" dependencies = [ "libc", - "rand_chacha 0.3.1", - "rand_core 0.6.4", -] - -[[package]] -name = "rand_chacha" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "556d3a1ca6600bfcbab7c7c91ccb085ac7fbbcd70e008a98742e7847f4f7bcef" -dependencies = [ - "autocfg 0.1.8", - "rand_core 0.3.1", + "rand_chacha", + "rand_core", ] [[package]] @@ -1806,24 +1825,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" dependencies = [ "ppv-lite86", - "rand_core 0.6.4", -] - -[[package]] -name = "rand_core" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a6fdeb83b075e8266dcc8762c22776f6877a63111121f5f8c7411e5be7eed4b" -dependencies = [ - "rand_core 0.4.2", + "rand_core", ] -[[package]] -name = "rand_core" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c33a3c44ca05fa6f1807d8e6743f3824e8509beca625669633be0acbdf509dc" - [[package]] name = "rand_core" version = "0.6.4" @@ -1833,68 +1837,6 @@ dependencies = [ "getrandom", ] -[[package]] -name = "rand_hc" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b40677c7be09ae76218dc623efbf7b18e34bced3f38883af07bb75630a21bc4" -dependencies = [ - "rand_core 0.3.1", -] - -[[package]] -name = "rand_isaac" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ded997c9d5f13925be2a6fd7e66bf1872597f759fd9dd93513dd7e92e5a5ee08" -dependencies = [ - "rand_core 0.3.1", -] - -[[package]] -name = "rand_jitter" -version = "0.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1166d5c91dc97b88d1decc3285bb0a99ed84b05cfd0bc2341bdf2d43fc41e39b" -dependencies = [ - "libc", - "rand_core 0.4.2", - "winapi", -] - -[[package]] -name = "rand_os" -version = "0.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b75f676a1e053fc562eafbb47838d67c84801e38fc1ba459e8f180deabd5071" -dependencies = [ - "cloudabi", - "fuchsia-cprng", - "libc", - "rand_core 0.4.2", - "rdrand", - "winapi", -] - -[[package]] -name = "rand_pcg" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "abf9b09b01790cfe0364f52bf32995ea3c39f4d2dd011eac241d2914146d0b44" -dependencies = [ - "autocfg 0.1.8", - "rand_core 0.4.2", -] - -[[package]] -name = "rand_xorshift" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cbf7e9e623549b0e21f6e97cf8ecf247c1a8fd2e8a992ae265314300b2455d5c" -dependencies = [ - "rand_core 0.3.1", -] - [[package]] name = "rawpointer" version = "0.2.1" @@ -1903,9 +1845,9 @@ checksum = "60a357793950651c4ed0f3f52338f53b2f809f32d83a07f72909fa13e4c6c1e3" [[package]] name = "rayon" -version = "1.8.1" +version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa7237101a77a10773db45d62004a272517633fbcc3df19d96455ede1122e051" +checksum = "e4963ed1bc86e4f3ee217022bd855b297cef07fb9eac5dfa1f788b220b49b3bd" dependencies = [ "either", "rayon-core", @@ -1921,24 +1863,6 @@ dependencies = [ "crossbeam-utils", ] -[[package]] -name = "rdrand" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "678054eb77286b51581ba43620cc911abf02758c91f93f479767aed0f90458b2" -dependencies = [ - "rand_core 0.3.1", -] - -[[package]] -name = "redox_syscall" -version = "0.2.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fb5a58c1855b4b6819d59012155603f0b22ad30cad752600aadfcb695265519a" -dependencies = [ - "bitflags 1.3.2", -] - [[package]] name = "redox_syscall" version = "0.4.1" @@ -2061,7 +1985,7 @@ checksum = "196e3b77b07fd5bfcbc8187ecaef5d5931820d9abd6c3fe0a9dc6d3ddb035d72" dependencies = [ "csv", "hashbrown 0.12.3", - "itertools", + "itertools 0.10.5", "lazy_static", "protobuf", "rayon", @@ -2103,9 +2027,9 @@ dependencies = [ [[package]] name = "ryu" -version = "1.0.16" +version = "1.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f98d2aa92eebf49b69786be48e4477826b256916e84a57ff2a4f21923b48eb4c" +checksum = "e86697c916019a8588c99b5fac3cead74ec0b4b819707a682fd4d23fa0ce1ba1" [[package]] name = "same-file" @@ -2156,18 +2080,18 @@ dependencies = [ [[package]] name = "serde" -version = "1.0.196" +version = "1.0.197" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "870026e60fa08c69f064aa766c10f10b1d62db9ccd4d0abb206472bee0ce3b32" +checksum = "3fb1c873e1b9b056a4dc4c0c198b24c3ffa059243875552b2bd0933b1aee4ce2" dependencies = [ "serde_derive", ] [[package]] name = "serde-xml-rs" -version = "0.5.1" +version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "65162e9059be2f6a3421ebbb4fef3e74b7d9e7c60c50a0e292c6239f19f1edfa" +checksum = "fb3aa78ecda1ebc9ec9847d5d3aba7d618823446a049ba2491940506da6e2782" dependencies = [ "log", "serde", @@ -2177,20 +2101,20 @@ dependencies = [ [[package]] name = "serde_derive" -version = "1.0.196" +version = "1.0.197" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33c85360c95e7d137454dc81d9a4ed2b8efd8fbe19cee57357b32b9771fccb67" +checksum = "7eb0b34b42edc17f6b7cac84a52a1c5f0e1bb2227e997ca9011ea3dd34e8610b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.48", + "syn 2.0.52", ] [[package]] name = "serde_json" -version = "1.0.113" +version = "1.0.114" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "69801b70b1c3dac963ecb03a364ba0ceda9cf60c71cfe475e99864759c8b8a79" +checksum = "c5f09b1bd632ef549eaa9f60a1f8de742bdbc698e6cee2095fc84dde5f549ae0" dependencies = [ "itoa", "ryu", @@ -2222,9 +2146,9 @@ dependencies = [ [[package]] name = "siphasher" -version = "0.2.3" +version = "0.3.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b8de496cf83d4ed58b6be86c3a275b8602f6ffe98d3024a869e124147a9a3ac" +checksum = "38b58827f4464d87d377d175e90bf58eb00fd8716ff0a62f80356b5e61555d0d" [[package]] name = "slab" @@ -2232,7 +2156,7 @@ version = "0.4.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8f92a496fb766b417c996b9c5e57daf2f7ad3b0bebe1ccfca4856390e3d3bb67" dependencies = [ - "autocfg 1.1.0", + "autocfg", ] [[package]] @@ -2243,9 +2167,9 @@ checksum = "e6ecd384b10a64542d77071bd64bd7b231f4ed5940fba55e98c3de13824cf3d7" [[package]] name = "smol_str" -version = "0.1.24" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fad6c857cbab2627dcf01ec85a623ca4e7dcb5691cbaa3d7fb7653671f0d09c9" +checksum = "e6845563ada680337a52d43bb0b29f396f2d911616f6573012645b9e3d048a49" dependencies = [ "serde", ] @@ -2258,22 +2182,12 @@ checksum = "1b6b67fb9a61334225b5b790716f609cd58395f895b3fe8b328786812a40bc3b" [[package]] name = "socket2" -version = "0.4.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9f7916fc008ca5542385b89a3d3ce689953c143e9304a9bf8beec1de48994c0d" -dependencies = [ - "libc", - "winapi", -] - -[[package]] -name = "socket2" -version = "0.5.5" +version = "0.5.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b5fac59a5cb5dd637972e5fca70daf0523c9067fcdc4842f053dae04a18f8e9" +checksum = "05ffd9c0a93b7543e062e759284fcf5f5e3b098501104bfbdde4d404db792871" dependencies = [ "libc", - "windows-sys 0.48.0", + "windows-sys 0.52.0", ] [[package]] @@ -2284,15 +2198,15 @@ checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a" [[package]] name = "strsim" -version = "0.9.3" +version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6446ced80d6c486436db5c078dde11a9f73d42b57fb273121e160b84f63d894c" +checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" [[package]] name = "strsim" -version = "0.10.0" +version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" +checksum = "5ee073c9e4cd00e28217186dbe12796d692868f432bf2e97ee73bed0c56dfa01" [[package]] name = "structopt" @@ -2311,7 +2225,7 @@ version = "0.4.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dcb5ae327f9cc13b68763b5749770cb9e048a99bd9dfdfa58d0cf05d5f64afe0" dependencies = [ - "heck", + "heck 0.3.3", "proc-macro-error", "proc-macro2", "quote", @@ -2331,9 +2245,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.48" +version = "2.0.52" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0f3531638e407dfc0814761abb7c00a5b54992b849452a0646b7f65c9f770f3f" +checksum = "b699d15b36d1f02c3e7c69f8ffef53de37aefae075d8488d4ba1a7788d574a07" dependencies = [ "proc-macro2", "quote", @@ -2388,7 +2302,7 @@ dependencies = [ "lazy_static", "libc", "ndarray", - "rand 0.8.5", + "rand", "thiserror", "torch-sys", "zip", @@ -2396,9 +2310,9 @@ dependencies = [ [[package]] name = "tempfile" -version = "3.10.0" +version = "3.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a365e8cd18e44762ef95d87f284f4b5cd04107fec2ff3052bd6a3e6069669e67" +checksum = "85b77fafb263dd9d05cbeac119526425676db3784113aa9295c88498cbf8bff1" dependencies = [ "cfg-if", "fastrand", @@ -2437,22 +2351,22 @@ dependencies = [ [[package]] name = "thiserror" -version = "1.0.56" +version = "1.0.57" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d54378c645627613241d077a3a79db965db602882668f9136ac42af9ecb730ad" +checksum = "1e45bcbe8ed29775f228095caf2cd67af7a4ccf756ebff23a306bf3e8b47b24b" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.56" +version = "1.0.57" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa0faa943b50f3db30a20aa7e265dbc66076993efed8463e8de414e5d06d3471" +checksum = "a953cb265bef375dae3de6663da4d3804eee9682ea80d8e2542529b73c531c81" dependencies = [ "proc-macro2", "quote", - "syn 2.0.48", + "syn 2.0.52", ] [[package]] @@ -2493,7 +2407,7 @@ dependencies = [ "mio", "num_cpus", "pin-project-lite", - "socket2 0.5.5", + "socket2", "windows-sys 0.48.0", ] @@ -2559,7 +2473,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.48", + "syn 2.0.52", ] [[package]] @@ -2798,9 +2712,9 @@ checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" [[package]] name = "unicode-normalization" -version = "0.1.22" +version = "0.1.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c5713f0fc4b5db668a2ac63cdb7bb4469d8c9fed047b1d0292cc7b0ce2ba921" +checksum = "a56d1686db2308d901306f92a263857ef59ea39678a5458e7cb17f01415101f5" dependencies = [ "tinyvec", ] @@ -2843,6 +2757,12 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ca61eb27fa339aa08826a29f03e87b99b4d8f0fc2255306fd266bb1b6a9de498" +[[package]] +name = "utf8parse" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" + [[package]] name = "uuid" version = "0.8.2" @@ -2872,9 +2792,9 @@ checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" [[package]] name = "walkdir" -version = "2.4.0" +version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d71d857dc86794ca4c280d616f7da00d2dbfd8cd788846559a6813e6aa4b54ee" +checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" dependencies = [ "same-file", "winapi-util", @@ -2922,7 +2842,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.48", + "syn 2.0.52", "wasm-bindgen-shared", ] @@ -2956,7 +2876,7 @@ checksum = "642f325be6301eb8107a83d12a8ac6c1e1c54345a7ef1a9261962dfefda09e66" dependencies = [ "proc-macro2", "quote", - "syn 2.0.48", + "syn 2.0.52", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -3023,7 +2943,7 @@ version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9" dependencies = [ - "windows-targets 0.52.0", + "windows-targets 0.52.4", ] [[package]] @@ -3041,7 +2961,7 @@ version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" dependencies = [ - "windows-targets 0.52.0", + "windows-targets 0.52.4", ] [[package]] @@ -3061,17 +2981,17 @@ dependencies = [ [[package]] name = "windows-targets" -version = "0.52.0" +version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a18201040b24831fbb9e4eb208f8892e1f50a37feb53cc7ff887feb8f50e7cd" +checksum = "7dd37b7e5ab9018759f893a1952c9420d060016fc19a472b4bb20d1bdd694d1b" dependencies = [ - "windows_aarch64_gnullvm 0.52.0", - "windows_aarch64_msvc 0.52.0", - "windows_i686_gnu 0.52.0", - "windows_i686_msvc 0.52.0", - "windows_x86_64_gnu 0.52.0", - "windows_x86_64_gnullvm 0.52.0", - "windows_x86_64_msvc 0.52.0", + "windows_aarch64_gnullvm 0.52.4", + "windows_aarch64_msvc 0.52.4", + "windows_i686_gnu 0.52.4", + "windows_i686_msvc 0.52.4", + "windows_x86_64_gnu 0.52.4", + "windows_x86_64_gnullvm 0.52.4", + "windows_x86_64_msvc 0.52.4", ] [[package]] @@ -3082,9 +3002,9 @@ checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" [[package]] name = "windows_aarch64_gnullvm" -version = "0.52.0" +version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cb7764e35d4db8a7921e09562a0304bf2f93e0a51bfccee0bd0bb0b666b015ea" +checksum = "bcf46cf4c365c6f2d1cc93ce535f2c8b244591df96ceee75d8e83deb70a9cac9" [[package]] name = "windows_aarch64_msvc" @@ -3094,9 +3014,9 @@ checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" [[package]] name = "windows_aarch64_msvc" -version = "0.52.0" +version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbaa0368d4f1d2aaefc55b6fcfee13f41544ddf36801e793edbbfd7d7df075ef" +checksum = "da9f259dd3bcf6990b55bffd094c4f7235817ba4ceebde8e6d11cd0c5633b675" [[package]] name = "windows_i686_gnu" @@ -3106,9 +3026,9 @@ checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" [[package]] name = "windows_i686_gnu" -version = "0.52.0" +version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a28637cb1fa3560a16915793afb20081aba2c92ee8af57b4d5f28e4b3e7df313" +checksum = "b474d8268f99e0995f25b9f095bc7434632601028cf86590aea5c8a5cb7801d3" [[package]] name = "windows_i686_msvc" @@ -3118,9 +3038,9 @@ checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" [[package]] name = "windows_i686_msvc" -version = "0.52.0" +version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ffe5e8e31046ce6230cc7215707b816e339ff4d4d67c65dffa206fd0f7aa7b9a" +checksum = "1515e9a29e5bed743cb4415a9ecf5dfca648ce85ee42e15873c3cd8610ff8e02" [[package]] name = "windows_x86_64_gnu" @@ -3130,9 +3050,9 @@ checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" [[package]] name = "windows_x86_64_gnu" -version = "0.52.0" +version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d6fa32db2bc4a2f5abeacf2b69f7992cd09dca97498da74a151a3132c26befd" +checksum = "5eee091590e89cc02ad514ffe3ead9eb6b660aedca2183455434b93546371a03" [[package]] name = "windows_x86_64_gnullvm" @@ -3142,9 +3062,9 @@ checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" [[package]] name = "windows_x86_64_gnullvm" -version = "0.52.0" +version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a657e1e9d3f514745a572a6846d3c7aa7dbe1658c056ed9c3344c4109a6949e" +checksum = "77ca79f2451b49fa9e2af39f0747fe999fcda4f5e241b2898624dca97a1f2177" [[package]] name = "windows_x86_64_msvc" @@ -3154,9 +3074,9 @@ checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" [[package]] name = "windows_x86_64_msvc" -version = "0.52.0" +version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dff9641d1cd4be8d1a070daf9e3773c5f67e78b4d9d42263020c057706765c04" +checksum = "32b752e52a2da0ddfbdbcc6fceadfeede4c939ed16d13e648833a61dfb611ed8" [[package]] name = "winreg" @@ -3194,6 +3114,26 @@ dependencies = [ "lzma-sys", ] +[[package]] +name = "zerocopy" +version = "0.7.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "74d4d3961e53fa4c9a25a8637fc2bfaf2595b3d3ae34875568a5cf64787716be" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.7.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ce1b18ccd8e73a9321186f97e46f9f04b778851177567b1975109d26a08d2a6" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.52", +] + [[package]] name = "zip" version = "0.5.13" diff --git a/accuracy/Cargo.toml b/accuracy/Cargo.toml index eda84e1..46e4ff5 100644 --- a/accuracy/Cargo.toml +++ b/accuracy/Cargo.toml @@ -15,7 +15,7 @@ rayon = { version = "1.4.0" } indicatif = { version = "0.15", features = ["with_rayon"] } # box-format = { git = "https://github.com/bbqsrc/box", branch = "master" } # tempdir = "0.3.7" -pretty_env_logger = "0.4.0" +pretty_env_logger = "0.5.0" # ctor = "*" # gumdrop = "0.8.0" # thiserror = "1.0.20" diff --git a/divvunspell-bin/Cargo.toml b/divvunspell-bin/Cargo.toml index 72dd7b6..2c6d18d 100644 --- a/divvunspell-bin/Cargo.toml +++ b/divvunspell-bin/Cargo.toml @@ -16,7 +16,7 @@ serde = { version = "1.0.116", features = ["derive"] } serde_json = "1.0.57" divvunspell = { version = "1.0.0-beta.3", features = ["internal_convert", "compression"], path = "../divvunspell" } box-format = { version = "0.3.2", features = ["reader"], default-features = false } -pretty_env_logger = "0.4.0" +pretty_env_logger = "0.5.0" gumdrop = "0.8.0" anyhow = "1.0.32" structopt = "0.3.17" diff --git a/divvunspell/Cargo.toml b/divvunspell/Cargo.toml index 9df4ee4..b2eaba1 100644 --- a/divvunspell/Cargo.toml +++ b/divvunspell/Cargo.toml @@ -13,28 +13,28 @@ crate-type = ["rlib", "staticlib", "cdylib"] [dependencies] libc = "0.2" -memmap2 = "0.5.0" +memmap2 = "0.9.4" byteorder = "1.3.4" serde = { version = "1.0.116", features = ["derive"] } serde_json = "1.0.57" -serde-xml-rs = { version = "0.5.0", default-features = false } +serde-xml-rs = { version = "0.6.0", default-features = false } zip = { version = "0.5", default-features = false } unic-segment = "0.9.0" unic-char-range = "0.9.0" unic-char-property = "0.9.0" unic-ucd-category = "0.9.0" -parking_lot = "0.11.2" -hashbrown = { version = "0.11", features = ["serde"] } +parking_lot = "0.12.1" +hashbrown = { version = "0.14.3", features = ["serde"] } lifeguard = "0.6.1" -smol_str = { version = "0.1.16", features = ["serde"] } +smol_str = { version = "0.2.1", features = ["serde"] } box-format = { version = "0.3.2", features = ["reader"], default-features = false } -itertools = "0.10" -strsim = "0.10.0" +itertools = "0.12.1" +strsim = "0.11.0" log = "0.4.11" -cffi = "0.1.6" +cffi = { path = "../../../github/cffi", optional = true } #{ git = "https://github.com/cffi-rs/cffi", optional = true } unic-ucd-common = "0.9.0" flatbuffers = { version = "0.6.1", optional = true } -env_logger = { version = "0.9", optional = true } +env_logger = { version = "0.11.2", optional = true } thiserror = "1.0.20" tch = { version = "0.6.1", optional = true } rust-bert = { version = "0.17.0", optional = true } @@ -44,7 +44,7 @@ fs_extra = "1.2.0" eieio = "1.0.0" pathos = "0.3.0" language-tags = "0.3.2" -globwalk = "0.8.1" +globwalk = "0.9.1" [features] compression = ["zip/deflate"] @@ -53,4 +53,4 @@ gpt2 = ["tch", "rust-bert", "rust_tokenizers"] # Internal features: unstable, not for external use! internal_convert = [] -internal_ffi = ["flatbuffers", "logging"] +internal_ffi = ["flatbuffers", "logging", "cffi"] diff --git a/divvunspell/src/archive/mod.rs b/divvunspell/src/archive/mod.rs index cea3da4..a56cf54 100644 --- a/divvunspell/src/archive/mod.rs +++ b/divvunspell/src/archive/mod.rs @@ -79,7 +79,7 @@ pub(crate) mod ffi { use cffi::{FromForeign, ToForeign}; use std::error::Error; - #[cffi::marshal(return_marshaler = "cffi::ArcMarshaler::")] + #[cffi::marshal(return_marshaler = cffi::ArcMarshaler::)] pub extern "C" fn divvun_speller_archive_open( #[marshal(cffi::PathBufMarshaler)] path: std::path::PathBuf, ) -> Result, Box> { diff --git a/divvunspell/src/speller/mod.rs b/divvunspell/src/speller/mod.rs index 16d0782..334f54f 100644 --- a/divvunspell/src/speller/mod.rs +++ b/divvunspell/src/speller/mod.rs @@ -471,7 +471,7 @@ pub(crate) mod ffi { }, case_handling, node_pool_size: config.node_pool_size, - completion_marker: None, + continuation_marker: None, }; Ok(out) From f4f6c63be2fc1cb6f95d746d66134dca2667fe35 Mon Sep 17 00:00:00 2001 From: Flammie A Pirinen Date: Tue, 25 Mar 2025 10:32:00 +0100 Subject: [PATCH 11/21] build fixes and docu --- divvunspell-bin/src/main.rs | 6 +++--- divvunspell/src/archive/boxf.rs | 6 +++++- divvunspell/src/archive/mod.rs | 3 ++- divvunspell/src/archive/zip.rs | 6 +++++- divvunspell/src/speller/mod.rs | 6 ++++++ divvunspell/src/speller/suggestion.rs | 8 ++++++++ 6 files changed, 29 insertions(+), 6 deletions(-) diff --git a/divvunspell-bin/src/main.rs b/divvunspell-bin/src/main.rs index 74a0ee5..3a55d71 100644 --- a/divvunspell-bin/src/main.rs +++ b/divvunspell-bin/src/main.rs @@ -22,7 +22,7 @@ use divvunspell::{ boxf::ThfstBoxSpellerArchive, error::SpellerArchiveError, BoxSpellerArchive, SpellerArchive, ZipSpellerArchive, }, - speller::{suggestion::Suggestion, Speller, SpellerConfig}, + speller::{suggestion::Suggestion, Analyzer, SpellerConfig}, tokenizer::Tokenize, }; @@ -159,7 +159,7 @@ impl OutputWriter for JsonWriter { } fn run( - speller: Arc, + speller: Arc, words: Vec, writer: &mut dyn OutputWriter, is_analyzing: bool, @@ -418,7 +418,7 @@ fn suggest(args: SuggestArgs) -> anyhow::Result<()> { let speller = if let Some(archive_path) = args.archive_path { let archive = load_archive(&archive_path)?; - let speller = archive.speller(); + let speller = archive.analyser(); speller } else if let (Some(lexicon_path), Some(mutator_path)) = (args.lexicon_path, args.mutator_path) { diff --git a/divvunspell/src/archive/boxf.rs b/divvunspell/src/archive/boxf.rs index f6b6c05..e2328e2 100644 --- a/divvunspell/src/archive/boxf.rs +++ b/divvunspell/src/archive/boxf.rs @@ -11,7 +11,7 @@ use super::{error::PredictorArchiveError, meta::PredictorMetadata, PredictorArch use super::error::SpellerArchiveError; use super::{meta::SpellerMetadata, SpellerArchive}; -use crate::speller::{HfstSpeller, Speller}; +use crate::speller::{HfstSpeller, Speller, Analyzer}; use crate::transducer::{ thfst::{MemmapThfstChunkedTransducer, MemmapThfstTransducer}, Transducer, @@ -97,6 +97,10 @@ where self.speller.clone() } + fn analyser(&self) -> Arc { + self.speller.clone() + } + fn metadata(&self) -> Option<&SpellerMetadata> { self.metadata.as_ref() } diff --git a/divvunspell/src/archive/mod.rs b/divvunspell/src/archive/mod.rs index 9fec66f..10d3631 100644 --- a/divvunspell/src/archive/mod.rs +++ b/divvunspell/src/archive/mod.rs @@ -16,7 +16,7 @@ use self::{ error::SpellerArchiveError, meta::{PredictorMetadata, SpellerMetadata}, }; -use crate::{predictor::Predictor, speller::Speller}; +use crate::{predictor::Predictor, speller::{Speller, Analyzer}}; pub(crate) struct TempMmap { mmap: Arc, @@ -48,6 +48,7 @@ pub trait SpellerArchive { /// retrieve spell-checker. fn speller(&self) -> Arc; + fn analyser(&self) -> Arc; /// retrieve metadata. fn metadata(&self) -> Option<&SpellerMetadata>; } diff --git a/divvunspell/src/archive/zip.rs b/divvunspell/src/archive/zip.rs index 5c763cd..48d7cc9 100644 --- a/divvunspell/src/archive/zip.rs +++ b/divvunspell/src/archive/zip.rs @@ -9,7 +9,7 @@ use std::sync::Arc; use super::error::SpellerArchiveError; use super::meta::SpellerMetadata; use super::{MmapRef, SpellerArchive, TempMmap}; -use crate::speller::{HfstSpeller, Speller}; +use crate::speller::{HfstSpeller, Speller, Analyzer}; use crate::transducer::hfst::HfstTransducer; pub type HfstZipSpeller = @@ -103,6 +103,10 @@ impl SpellerArchive for ZipSpellerArchive { self.speller.clone() } + fn analyser(&self) -> Arc { + self.speller.clone() + } + fn metadata(&self) -> Option<&SpellerMetadata> { Some(&self.metadata) } diff --git a/divvunspell/src/speller/mod.rs b/divvunspell/src/speller/mod.rs index 65b90f0..9e85b37 100644 --- a/divvunspell/src/speller/mod.rs +++ b/divvunspell/src/speller/mod.rs @@ -40,6 +40,7 @@ pub struct SpellerConfig { pub reweight: Option, /// some parallel stuff? pub node_pool_size: usize, + /// used when suggesting unfinished word parts pub continuation_marker: Option, /// whether we try to recase mispelt word before other suggestions pub recase: bool, @@ -90,14 +91,19 @@ pub trait Speller { fn suggest_with_config(self: Arc, word: &str, config: &SpellerConfig) -> Vec; } +/// can provide in-depth analyses along with suggestions pub trait Analyzer { + /// analyse the input word form fn analyze_input(self: Arc, word: &str) -> Vec; + /// analyse input word form with recasing and stuff from configs fn analyze_input_with_config( self: Arc, word: &str, config: &SpellerConfig, ) -> Vec; + /// analyse the suggested word forms fn analyze_output(self: Arc, word: &str) -> Vec; + /// analyse the suggested word forms with recasing and stuff from configs fn analyze_output_with_config( self: Arc, word: &str, diff --git a/divvunspell/src/speller/suggestion.rs b/divvunspell/src/speller/suggestion.rs index 5b6a98a..f9fff9d 100644 --- a/divvunspell/src/speller/suggestion.rs +++ b/divvunspell/src/speller/suggestion.rs @@ -6,14 +6,19 @@ use std::cmp::Ordering; use std::cmp::Ordering::Equal; #[derive(Clone, Debug, Serialize, Deserialize)] +/// Suggestion for a spelling correction pub struct Suggestion { + /// the suggested word-form pub value: SmolStr, + /// total penalty weight of the word-form pub weight: Weight, + /// whether the word is completed or partial #[serde(skip_serializing_if = "Option::is_none")] pub completed: Option, } impl Suggestion { + /// creates a spelling correction suggestion pub fn new(value: SmolStr, weight: Weight, completed: Option) -> Suggestion { Suggestion { value, @@ -22,14 +27,17 @@ impl Suggestion { } } + /// gets the suggested word-form pub fn value(&self) -> &str { &self.value } + /// gets the penalty weight of the suggestion pub fn weight(&self) -> Weight { self.weight } + /// returns whether this suggestion is a full word or partial pub fn completed(&self) -> Option { self.completed } From 14e710e0188e63912bebe0c48395a56f8a9e5a9e Mon Sep 17 00:00:00 2001 From: Flammie A Pirinen Date: Tue, 25 Mar 2025 14:44:51 +0100 Subject: [PATCH 12/21] fix input analysis --- divvunspell/src/speller/mod.rs | 2 +- divvunspell/src/speller/worker.rs | 37 +++++++++++++------------------ 2 files changed, 17 insertions(+), 22 deletions(-) diff --git a/divvunspell/src/speller/mod.rs b/divvunspell/src/speller/mod.rs index 9e85b37..e984b7e 100644 --- a/divvunspell/src/speller/mod.rs +++ b/divvunspell/src/speller/mod.rs @@ -92,7 +92,7 @@ pub trait Speller { } /// can provide in-depth analyses along with suggestions -pub trait Analyzer { +pub trait Analyzer: Speller { /// analyse the input word form fn analyze_input(self: Arc, word: &str) -> Vec; /// analyse input word form with recasing and stuff from configs diff --git a/divvunspell/src/speller/worker.rs b/divvunspell/src/speller/worker.rs index fba1623..e357351 100644 --- a/divvunspell/src/speller/worker.rs +++ b/divvunspell/src/speller/worker.rs @@ -508,40 +508,35 @@ where log::trace!("Beginning analyze"); let pool = Pool::with_size_and_max(0, 0); let mut nodes = speller_start_node(&pool, self.state_size() as usize); + log::trace!("beginning analyze {:?}", self.input); let mut lookups = HashMap::new(); let mut analyses: Vec = vec![]; - let best_weight = self.config.max_weight.unwrap_or(f32::MAX); - while let Some(next_node) = nodes.pop() { - let max_weight = self.update_weight_limit(best_weight, &analyses); - - self.lexicon_epsilons(&pool, max_weight, &next_node, &mut nodes); - self.lexicon_consume(&pool, max_weight, &next_node, &mut nodes); - if self.speller.lexicon().is_final(next_node.lexicon_state) { - let weight = next_node.weight() - + self - .speller - .lexicon() - .final_weight(next_node.lexicon_state) - .unwrap(); - + if next_node.input_state as usize == self.input.len() + && self.speller.lexicon().is_final(next_node.lexicon_state) + { let string = self .speller .lexicon() .alphabet() .string_from_symbols(&next_node.string); - - { - let entry = lookups.entry(string).or_insert(weight); - - if *entry > weight { - *entry = weight; - } + let weight = next_node.weight() + + self + .speller + .lexicon() + .final_weight(next_node.lexicon_state) + .unwrap(); + let entry = lookups.entry(string).or_insert(weight); + if *entry > weight { + *entry = weight; } } + self.lexicon_epsilons(&pool, f32::INFINITY, &next_node, &mut nodes); + self.lexicon_consume(&pool, f32::INFINITY, &next_node, &mut nodes); analyses = self.generate_sorted_suggestions(&lookups); } analyses + } pub(crate) fn suggest(&self) -> Vec { From ede60355cbda8ffd6cf7f90d7d74e3bf0ea47749 Mon Sep 17 00:00:00 2001 From: Flammie A Pirinen Date: Wed, 26 Mar 2025 10:27:02 +0100 Subject: [PATCH 13/21] hack in some correcting mode --- divvunspell/src/speller/mod.rs | 15 ++++++++++----- divvunspell/src/speller/worker.rs | 28 ++++++++++++++++++++++++---- 2 files changed, 34 insertions(+), 9 deletions(-) diff --git a/divvunspell/src/speller/mod.rs b/divvunspell/src/speller/mod.rs index e984b7e..80a37fe 100644 --- a/divvunspell/src/speller/mod.rs +++ b/divvunspell/src/speller/mod.rs @@ -140,7 +140,8 @@ where log::debug!("is_correct_with_config: ‘{}’ ~ {:?}?; config: {:?}", word, words, config); for word in std::iter::once(word.into()).chain(words.into_iter()) { - let worker = SpellerWorker::new(self.clone(), self.to_input_vec(&word), config.clone()); + let worker = SpellerWorker::new(self.clone(), + self.to_input_vec(&word), config.clone(), false); if worker.is_correct() { return true; @@ -193,7 +194,8 @@ where return vec![]; } - let worker = SpellerWorker::new(self.clone(), self.to_input_vec(&word), config.clone()); + let worker = SpellerWorker::new(self.clone(), + self.to_input_vec(&word), config.clone(), false); log::trace!("Beginning analyze with config in mod"); worker.analyze() @@ -218,7 +220,8 @@ where return vec![]; } log::trace!("Beginning analyze suggest with config in mod"); - let worker = SpellerWorker::new(self.clone(), self.to_input_vec(word), config.clone()); + let worker = SpellerWorker::new(self.clone(), + self.to_input_vec(word), config.clone(), false); worker.suggest() } @@ -288,7 +291,8 @@ where } fn suggest_single(self: Arc, word: &str, config: &SpellerConfig) -> Vec { - let worker = SpellerWorker::new(self.clone(), self.to_input_vec(word), config.clone()); + let worker = SpellerWorker::new(self.clone(), self.to_input_vec(word), + config.clone(), true); log::trace!("suggesting single {}", word); worker.suggest() @@ -313,7 +317,8 @@ where for word in std::iter::once(&original_input).chain(words.iter()) { log::trace!("suggesting for word {}", word); - let worker = SpellerWorker::new(self.clone(), self.to_input_vec(&word), config.clone()); + let worker = SpellerWorker::new(self.clone(), + self.to_input_vec(&word), config.clone(), true); let mut suggestions = worker.suggest(); match mutation { diff --git a/divvunspell/src/speller/worker.rs b/divvunspell/src/speller/worker.rs index e357351..73e9c15 100644 --- a/divvunspell/src/speller/worker.rs +++ b/divvunspell/src/speller/worker.rs @@ -23,6 +23,7 @@ pub struct SpellerWorker, U: Transducer speller: Arc>, input: Vec, config: SpellerConfig, + mode_correcting: bool, } #[allow(clippy::too_many_arguments)] @@ -37,11 +38,13 @@ where speller: Arc>, input: Vec, config: SpellerConfig, + mode_correcting: bool, ) -> SpellerWorker { SpellerWorker { speller, input, config, + mode_correcting, } } @@ -70,7 +73,12 @@ where if self .is_under_weight_limit(max_weight, next_node.weight() + transition_weight) { - let new_node = next_node.update_lexicon(pool, transition); + let new_node = if self.mode_correcting { + next_node.update_lexicon(pool, + transition.clone_with_epsilon_symbol()) + } else { + next_node.update_lexicon(pool, transition) + }; output_nodes.push(new_node); } } else { @@ -220,15 +228,27 @@ where ); if is_under_weight_limit { - let new_node = next_node.update( + let new_node = if self.mode_correcting { + next_node.update( + pool, + input_sym, + Some(next_node.input_state + input_increment as + u32), + mutator_state, + noneps_trans.target().unwrap(), + noneps_trans.weight().unwrap() + mutator_weight, + ) + + } else { + next_node.update( pool, sym, Some(next_node.input_state + input_increment as u32), mutator_state, noneps_trans.target().unwrap(), noneps_trans.weight().unwrap() + mutator_weight, - ); - + ) + }; output_nodes.push(new_node); } } From aa4802227961395c37c9e99c6e5905dd1fac58bc Mon Sep 17 00:00:00 2001 From: Flammie A Pirinen Date: Thu, 22 May 2025 17:47:24 +0200 Subject: [PATCH 14/21] use metadata to config --- divvunspell-bin/src/main.rs | 37 ++++++++++++++++++++------------- divvunspell/src/archive/meta.rs | 32 +++++++++++++++++++++++++++- 2 files changed, 53 insertions(+), 16 deletions(-) diff --git a/divvunspell-bin/src/main.rs b/divvunspell-bin/src/main.rs index 1538382..0daaa22 100644 --- a/divvunspell-bin/src/main.rs +++ b/divvunspell-bin/src/main.rs @@ -371,14 +371,35 @@ fn load_archive(path: &Path) -> Result, SpellerArchiveEr } fn suggest(args: SuggestArgs) -> anyhow::Result<()> { + // 1. default config let mut suggest_cfg = SpellerConfig::default(); + let speller = if let Some(archive_path) = args.archive_path { + let archive = load_archive(&archive_path)?; + // 2. config from metadata + if let Some(metadata) = archive.metadata() { + if let Some(continuation) = &metadata.acceptor.continuation { + suggest_cfg.continuation_marker = Some(continuation.clone()); + } + } + let speller = archive.analyser(); + speller + } else if let (Some(lexicon_path), Some(mutator_path)) = (args.lexicon_path, args.mutator_path) + { + let acceptor = HfstTransducer::from_path(&Fs, lexicon_path)?; + let errmodel = HfstTransducer::from_path(&Fs, mutator_path)?; + HfstSpeller::new(errmodel, acceptor) as _ + } else { + eprintln!("Either a BHFST or ZHFST archive must be provided, or a mutator and lexicon."); + process::exit(1); + }; + // 3. config from explicit config file if let Some(config_path) = args.config { let config_file = std::fs::File::open(config_path)?; let config: SpellerConfig = serde_json::from_reader(config_file)?; suggest_cfg = config; } - + // 4. config from other command line stuff if args.disable_reweight { suggest_cfg.reweight = None; } @@ -425,20 +446,6 @@ fn suggest(args: SuggestArgs) -> anyhow::Result<()> { args.inputs.into_iter().collect() }; - let speller = if let Some(archive_path) = args.archive_path { - let archive = load_archive(&archive_path)?; - let speller = archive.analyser(); - speller - } else if let (Some(lexicon_path), Some(mutator_path)) = (args.lexicon_path, args.mutator_path) - { - let acceptor = HfstTransducer::from_path(&Fs, lexicon_path)?; - let errmodel = HfstTransducer::from_path(&Fs, mutator_path)?; - - HfstSpeller::new(errmodel, acceptor) as _ - } else { - eprintln!("Either a BHFST or ZHFST archive must be provided, or a mutator and lexicon."); - process::exit(1); - }; run( speller, diff --git a/divvunspell/src/archive/meta.rs b/divvunspell/src/archive/meta.rs index 72f7a65..afbe290 100644 --- a/divvunspell/src/archive/meta.rs +++ b/divvunspell/src/archive/meta.rs @@ -1,48 +1,78 @@ -//! Archive metadata handling +//! Data structures of speller metadata. +//! +//! These are usually read from the speller archives, in xml or json files or +//! such. XML format is described here and json format there. use serde::{Deserialize, Serialize}; use serde_xml_rs::{from_reader, Error, ParserConfig}; +/// Speller metadata #[derive(Serialize, Deserialize, Debug, Clone)] pub struct SpellerMetadata { + /// speller info pub info: SpellerMetadataInfo, + /// acceptor metadata pub acceptor: SpellerMetadataAcceptor, + /// error model metadata pub errmodel: SpellerMetadataErrmodel, } +/// Predictor metadata #[derive(Serialize, Deserialize, Debug, Default, Clone)] pub struct PredictorMetadata { + /// whether speller is #[serde(default)] pub speller: bool, } +/// localised speller title #[derive(Serialize, Deserialize, Debug, Clone)] pub struct SpellerTitle { + /// ISO 639 code of the title's content language pub lang: Option, + /// translated title #[serde(rename = "$value")] pub value: String, } +/// Speller metadata #[derive(Serialize, Deserialize, Debug, Clone)] pub struct SpellerMetadataInfo { + /// ISO-639 code of speller language pub locale: String, + /// localised, human readable titles of speller pub title: Vec, + /// human readable description of speller pub description: String, + /// creator and copyright owner of the speller pub producer: String, } +/// Acceptor metadata #[derive(Serialize, Deserialize, Debug, Clone)] pub struct SpellerMetadataAcceptor { + /// acceptor type: + /// - `blah` if normal dictionary automaton + /// - `foo` if analyser #[serde(rename = "type", default)] pub type_: String, + /// locally unique id for this acceptor pub id: String, + /// localised human readable titles of speller pub title: Vec, + /// human readable description of the acceptor pub description: String, + /// marker for incomplete strings + pub continuation: Option, } +/// Error model metadata #[derive(Serialize, Deserialize, Debug, Clone)] pub struct SpellerMetadataErrmodel { + /// locally unique id for the error model pub id: String, + /// localised human readable titles for the error model pub title: Vec, + /// human readable description of the error model pub description: String, } From 01f25691e301609ee319879688dcd8aeaf6dd81a Mon Sep 17 00:00:00 2001 From: Flammie A Pirinen Date: Thu, 22 May 2025 17:47:54 +0200 Subject: [PATCH 15/21] I cannot run tesI cannot run test --- divvunspell/src/paths.rs | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/divvunspell/src/paths.rs b/divvunspell/src/paths.rs index ed84a3b..662cdde 100644 --- a/divvunspell/src/paths.rs +++ b/divvunspell/src/paths.rs @@ -3,11 +3,15 @@ use std::path::PathBuf; #[cfg(target_os = "windows")] use std::path::PathBuf; +#[cfg(target_os = "linux")] +use std::path::PathBuf; #[cfg(target_os = "macos")] use language_tags::LanguageTag; #[cfg(target_os = "windows")] use language_tags::LanguageTag; +#[cfg(target_os = "linux")] +use language_tags::LanguageTag; #[cfg(target_os = "macos")] pub fn find_speller_path(tag: LanguageTag) -> Option { @@ -48,3 +52,8 @@ pub fn find_speller_path(tag: LanguageTag) -> Option { .next() .map(|x| x.path().to_path_buf()) } + +#[cfg(target_os = "linux")] +pub fn find_speller_path(tag: LanguageTag) -> Option { + None +} From b9b1caee6edac3a79c6e9416df6dbe71b6e22404 Mon Sep 17 00:00:00 2001 From: Flammie A Pirinen Date: Thu, 22 May 2025 17:48:21 +0200 Subject: [PATCH 16/21] doens't build with such path --- Cargo.lock | 158 ++++++++++++++++------------------------- divvunspell/Cargo.toml | 2 +- 2 files changed, 64 insertions(+), 96 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 2a197de..e09cf7a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -55,18 +55,6 @@ dependencies = [ "version_check", ] -[[package]] -name = "ahash" -version = "0.8.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b79b82693f705137f8fb9b37871d99e4f9a7df12b917eed79c3d3954830a60b" -dependencies = [ - "cfg-if", - "once_cell", - "version_check", - "zerocopy", -] - [[package]] name = "aho-corasick" version = "1.1.3" @@ -91,12 +79,6 @@ dependencies = [ "alloc-no-stdlib", ] -[[package]] -name = "allocator-api2" -version = "0.2.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0942ffc6dcaadf03badf6e6a2d0228460359d5e34b57ccdc720b7382dfbd5ec5" - [[package]] name = "android-tzdata" version = "0.1.1" @@ -188,15 +170,6 @@ dependencies = [ "winapi", ] -[[package]] -name = "autocfg" -version = "0.1.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0dde43e75fd43e8a1bf86103336bc699aa8d17ad1be60c76c0bdfd4828e19b78" -dependencies = [ - "autocfg 1.4.0", -] - [[package]] name = "autocfg" version = "1.4.0" @@ -261,7 +234,7 @@ dependencies = [ "comde", "fastvlq", "log", - "memmap2 0.9.5", + "memmap2", "pathdiff", "relative-path", "serde_json", @@ -369,6 +342,7 @@ version = "1.2.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8691782945451c1c383942c4874dbe63814f61cb57ef773cda2972682b7bb3c0" dependencies = [ + "jobserver", "libc", "shlex", ] @@ -376,6 +350,7 @@ dependencies = [ [[package]] name = "cffi" version = "0.2.0-dev" +source = "git+https://github.com/cffi-rs/cffi#ee4a9f5a5bcf72164831650b23d9dc0d5618a04e" dependencies = [ "cffi-impl", "libc", @@ -385,17 +360,18 @@ dependencies = [ [[package]] name = "cffi-impl" version = "0.2.0-dev" +source = "git+https://github.com/cffi-rs/cffi#ee4a9f5a5bcf72164831650b23d9dc0d5618a04e" dependencies = [ "ctor", "darling", - "heck 0.3.3", + "heck 0.4.1", "log", "phf", "phf_codegen", "pretty_env_logger", "proc-macro2", "quote", - "syn 2.0.52", + "syn 2.0.101", ] [[package]] @@ -483,12 +459,6 @@ version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f46ad14479a25103f283c0f10005961cf086d8dc42205bb44c46ac563475dca6" -[[package]] -name = "cloudabi" -version = "0.0.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7" - [[package]] name = "colorchoice" version = "1.0.3" @@ -634,7 +604,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ad291aa74992b9b7a7e88c38acbbf6ad7e107f1d90ee8775b7bc1fc3394f485c" dependencies = [ "quote", - "syn 2.0.52", + "syn 2.0.101", ] [[package]] @@ -688,7 +658,7 @@ dependencies = [ "proc-macro2", "quote", "strsim 0.10.0", - "syn 2.0.52", + "syn 2.0.101", ] [[package]] @@ -699,7 +669,7 @@ checksum = "a668eda54683121533a393014d8692171709ff57a7d61f187b6e782719f8933f" dependencies = [ "darling_core", "quote", - "syn 2.0.52", + "syn 2.0.101", ] [[package]] @@ -782,13 +752,13 @@ dependencies = [ "flatbuffers", "fs_extra", "globwalk", - "hashbrown 0.14.3", + "hashbrown 0.11.2", "itertools 0.12.1", "language-tags", "libc", "lifeguard", "log", - "memmap2 0.5.10", + "memmap2", "parking_lot", "pathos", "rust-bert", @@ -797,7 +767,7 @@ dependencies = [ "serde-xml-rs", "serde_json", "smol_str", - "strsim 0.11.0", + "strsim 0.11.1", "tch", "tempfile", "thiserror", @@ -880,8 +850,10 @@ version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6c012a26a7f605efc424dd53697843a72be7dc86ad2d01f7814337794a12231d" dependencies = [ - "atty", - "humantime 2.2.0", + "anstream", + "anstyle", + "env_filter", + "humantime", "log", ] @@ -1114,7 +1086,7 @@ version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0bf760ebf69878d9fd8f110c89703d90ce35095324d1f1edcb595c63945ee757" dependencies = [ - "bitflags 2.4.2", + "bitflags 2.9.0", "ignore", "walkdir", ] @@ -1180,7 +1152,7 @@ version = "0.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" dependencies = [ - "ahash 0.7.8", + "ahash", ] [[package]] @@ -1198,6 +1170,12 @@ dependencies = [ "unicode-segmentation", ] +[[package]] +name = "heck" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" + [[package]] name = "heck" version = "0.5.0" @@ -1219,6 +1197,12 @@ version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" +[[package]] +name = "hermit-abi" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f154ce46856750ed433c8649605bf7ed2de3bc35fd9d2a9f30cddd873c80cb08" + [[package]] name = "hmac" version = "0.12.1" @@ -1262,15 +1246,6 @@ version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" -[[package]] -name = "humantime" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df004cfca50ef23c36850aaaa59ad52cc70d0e90243c3c7737a4dd32dc7a3c4f" -dependencies = [ - "quick-error", -] - [[package]] name = "humantime" version = "2.2.0" @@ -1568,6 +1543,17 @@ dependencies = [ "smallvec", ] +[[package]] +name = "is-terminal" +version = "0.4.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e04d7f318608d35d4b61ddd75cbdaee86b023ebe2bd5a66ee0915f0bf93095a9" +dependencies = [ + "hermit-abi 0.5.1", + "libc", + "windows-sys 0.59.0", +] + [[package]] name = "is_terminal_polyfill" version = "1.70.1" @@ -1704,7 +1690,7 @@ version = "0.4.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17" dependencies = [ - "autocfg 1.4.0", + "autocfg", "scopeguard", ] @@ -1732,7 +1718,7 @@ version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9380b911e3e96d10c1f415da0876389aaf1b56759054eeb0de7df940c456ba1a" dependencies = [ - "autocfg 1.4.0", + "autocfg", "rawpointer", ] @@ -1742,15 +1728,6 @@ version = "2.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" -[[package]] -name = "memmap2" -version = "0.5.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "83faa42c0a078c393f6b29d5db232d8be22776a891f8f56e5284faee4a20b327" -dependencies = [ - "libc", -] - [[package]] name = "memmap2" version = "0.9.5" @@ -1846,7 +1823,7 @@ version = "0.2.19" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" dependencies = [ - "autocfg 1.4.0", + "autocfg", ] [[package]] @@ -1947,25 +1924,27 @@ checksum = "afb2e1c3ee07430c2cf76151675e583e0f19985fa6efae47d6848a3e2c824f85" [[package]] name = "parking_lot" -version = "0.12.1" +version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f" +checksum = "7d17b78036a60663b797adeaee46f5c9dfebb86948d1255007a1d6be0271ff99" dependencies = [ + "instant", "lock_api", "parking_lot_core", ] [[package]] name = "parking_lot_core" -version = "0.9.9" +version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c42a9226546d68acdd9c0a280d17ce19bfe27a46bf68784e4066115788d008e" +checksum = "60a2cfe6f0ad2bfc16aefa463b497d5c7a5ecd44a23efa72aa342d90177356dc" dependencies = [ "cfg-if", + "instant", "libc", - "redox_syscall", + "redox_syscall 0.2.16", "smallvec", - "windows-targets 0.48.5", + "winapi", ] [[package]] @@ -1975,7 +1954,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7676374caaee8a325c9e7a2ae557f216c5563a171d6997b0ef8a65af35147700" dependencies = [ "base64ct", - "rand_core 0.6.4", + "rand_core", "subtle", ] @@ -2221,6 +2200,15 @@ dependencies = [ "crossbeam-utils", ] +[[package]] +name = "redox_syscall" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fb5a58c1855b4b6819d59012155603f0b22ad30cad752600aadfcb695265519a" +dependencies = [ + "bitflags 1.3.2", +] + [[package]] name = "redox_syscall" version = "0.5.12" @@ -2547,7 +2535,7 @@ version = "0.4.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8f92a496fb766b417c996b9c5e57daf2f7ad3b0bebe1ccfca4856390e3d3bb67" dependencies = [ - "autocfg 1.4.0", + "autocfg", ] [[package]] @@ -3757,26 +3745,6 @@ dependencies = [ "syn 2.0.101", ] -[[package]] -name = "zerocopy" -version = "0.7.32" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "74d4d3961e53fa4c9a25a8637fc2bfaf2595b3d3ae34875568a5cf64787716be" -dependencies = [ - "zerocopy-derive", -] - -[[package]] -name = "zerocopy-derive" -version = "0.7.32" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ce1b18ccd8e73a9321186f97e46f9f04b778851177567b1975109d26a08d2a6" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.52", -] - [[package]] name = "zip" version = "0.5.13" diff --git a/divvunspell/Cargo.toml b/divvunspell/Cargo.toml index b8a914f..3dbf2e3 100644 --- a/divvunspell/Cargo.toml +++ b/divvunspell/Cargo.toml @@ -32,7 +32,7 @@ box-format = { version = "0.3.2", features = ["reader"], default-features = fals itertools = "0.12.1" strsim = "0.11.0" log = "0.4.11" -cffi = { path = "../../../github/cffi", optional = true } #{ git = "https://github.com/cffi-rs/cffi", optional = true } +cffi = { git = "https://github.com/cffi-rs/cffi", optional = true } unic-ucd-common = "0.9.0" flatbuffers = { version = "0.6.1", optional = true } env_logger = { version = "0.11.2", optional = true } From 953e674d28c9d39b1e97821dca90872d329b5de9 Mon Sep 17 00:00:00 2001 From: Flammie A Pirinen Date: Mon, 2 Jun 2025 19:18:28 +0200 Subject: [PATCH 17/21] analysing speller example --- divvunspell-bin/src/main.rs | 5 ++++ divvunspell/src/speller/mod.rs | 38 +++++++++++++++++++++++++++++++ divvunspell/src/speller/worker.rs | 18 +++++++-------- 3 files changed, 52 insertions(+), 9 deletions(-) diff --git a/divvunspell-bin/src/main.rs b/divvunspell-bin/src/main.rs index 0daaa22..14a34dd 100644 --- a/divvunspell-bin/src/main.rs +++ b/divvunspell-bin/src/main.rs @@ -186,6 +186,11 @@ fn run( .clone() .analyze_output_with_config(&word, &suggest_cfg); writer.write_output_analyses(&word, &output_analyses); + + let final_suggs = speller + .clone() + .analyse_suggest_with_config(&word, &suggest_cfg); + writer.write_suggestions(&word, &final_suggs); } } } diff --git a/divvunspell/src/speller/mod.rs b/divvunspell/src/speller/mod.rs index 41e64d1..6063719 100644 --- a/divvunspell/src/speller/mod.rs +++ b/divvunspell/src/speller/mod.rs @@ -159,6 +159,14 @@ pub trait Analyzer: Speller { word: &str, config: &SpellerConfig, ) -> Vec; + /// create suggestion list and use their analyses for finetununt + fn analyse_suggest(self: Arc, word: &str) -> Vec; + /// create suggestion list and use analyses to finetune with config + fn analyse_suggest_with_config( + self: Arc, + word: &str, + config: &SpellerConfig, + ) -> Vec; } impl Speller for HfstSpeller @@ -265,6 +273,11 @@ where self.analyze_output_with_config(word, &SpellerConfig::default()) } + #[inline] + fn analyse_suggest(self: Arc, word: &str) -> Vec { + self.analyse_suggest_with_config(word, &SpellerConfig::default()) + } + fn analyze_output_with_config( self: Arc, word: &str, @@ -279,6 +292,31 @@ where worker.suggest() } + + fn analyse_suggest_with_config( + self: Arc, + word: &str, + config: &SpellerConfig + ) -> Vec { + let mut suggs = self.clone().suggest_with_config(word, config); + suggs.retain(|sugg| { + log::trace!("suggestion {}", sugg.value); + let analyses = self.clone().analyze_input_with_config(sugg.value.as_str(), + config); + let mut all_filtered = true; + for analysis in analyses { + log::trace!("-> {}", analysis.value); + if !analysis.value.contains("+Spell/NoSugg") { + all_filtered = false; + } else { + log::trace!("filtering=?"); + } + } + !all_filtered + }); + suggs + } + } /// a speller consisting of two HFST automata diff --git a/divvunspell/src/speller/worker.rs b/divvunspell/src/speller/worker.rs index 428d24b..df6edd7 100644 --- a/divvunspell/src/speller/worker.rs +++ b/divvunspell/src/speller/worker.rs @@ -419,14 +419,14 @@ where let input_sym = alphabet_translator[self.input[input_state as usize] as usize]; let next_lexicon_state = next_node.lexicon_state + 1; - log::trace!( - "lexicon consuming {}: {}", - input_sym, - self.speller - .lexicon - .alphabet() - .string_from_symbols(&[input_sym]) - ); + // log::trace!( + // "lexicon consuming {}: {}", + // input_sym, + // self.speller + // .lexicon + // .alphabet() + // .string_from_symbols(&[input_sym]) + // ); if !lexicon.has_transitions(next_lexicon_state, Some(input_sym)) { // we have no regular transitions for this @@ -655,7 +655,7 @@ where &self, corrections: &HashMap, ) -> Vec { - log::trace!("Generating sorted suggestions"); + //log::trace!("Generating sorted suggestions"); let mut c: Vec; if let Some(s) = &self.config.continuation_marker { c = corrections From 5e00edaac7184294e3b9a6b433c2c4ad951b3ca6 Mon Sep 17 00:00:00 2001 From: Brendan Molloy Date: Thu, 9 Oct 2025 18:12:05 +0200 Subject: [PATCH 18/21] Refactor API for public release - Remove predictor functionality since it is unmaintained and non-functional - Organise API into what should be public - Merge analysis and speller traits --- Cargo.lock | 2303 +++-------------- README.md | 12 - divvunspell-bin/Cargo.toml | 4 - divvunspell-bin/src/main.rs | 131 +- divvunspell/Cargo.toml | 4 - divvunspell/src/archive/boxf.rs | 72 +- divvunspell/src/archive/error.rs | 40 +- divvunspell/src/archive/meta.rs | 172 +- divvunspell/src/archive/mod.rs | 32 +- divvunspell/src/archive/zip.rs | 10 +- divvunspell/src/constants.rs | 6 +- divvunspell/src/lib.rs | 13 +- divvunspell/src/predictor/gpt2.rs | 58 - divvunspell/src/predictor/mod.rs | 9 - divvunspell/src/speller/mod.rs | 202 +- divvunspell/src/speller/worker.rs | 55 +- divvunspell/src/tokenizer/mod.rs | 8 +- divvunspell/src/transducer/convert.rs | 4 +- divvunspell/src/transducer/mod.rs | 10 +- .../src/transducer/symbol_transition.rs | 12 +- divvunspell/src/transducer/tree_node.rs | 14 +- docs/src/divvunspell/archive/meta.rs.html | 2 +- thfst-tools/src/main.rs | 6 +- 23 files changed, 763 insertions(+), 2416 deletions(-) delete mode 100644 divvunspell/src/predictor/gpt2.rs delete mode 100644 divvunspell/src/predictor/mod.rs diff --git a/Cargo.lock b/Cargo.lock index e09cf7a..998346d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -10,7 +10,7 @@ dependencies = [ "csv", "distance", "divvunspell", - "indicatif 0.15.0", + "indicatif", "pretty_env_logger", "rayon", "serde", @@ -18,31 +18,11 @@ dependencies = [ "structopt", ] -[[package]] -name = "addr2line" -version = "0.24.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dfbe277e56a376000877090da837660b4427aad530e3028d44e0bffe4f89a1c1" -dependencies = [ - "gimli", -] - [[package]] name = "adler2" -version = "2.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627" - -[[package]] -name = "aes" -version = "0.8.4" +version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b169f7a6d4742236a0a00c541b845991d0ac43e546831af1249753ab4c3aa3a0" -dependencies = [ - "cfg-if", - "cipher", - "cpufeatures", -] +checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" [[package]] name = "ahash" @@ -79,12 +59,6 @@ dependencies = [ "alloc-no-stdlib", ] -[[package]] -name = "android-tzdata" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0" - [[package]] name = "android_system_properties" version = "0.1.5" @@ -105,9 +79,9 @@ dependencies = [ [[package]] name = "anstream" -version = "0.6.18" +version = "0.6.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8acc5369981196006228e28809f761875c0327210a891e941f4c683b3a99529b" +checksum = "43d5b281e737544384e969a5ccad3f1cdd24b48086a0fc1b2a5262a26b8f4f4a" dependencies = [ "anstyle", "anstyle-parse", @@ -120,44 +94,44 @@ dependencies = [ [[package]] name = "anstyle" -version = "1.0.10" +version = "1.0.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "55cc3b69f167a1ef2e161439aa98aed94e6028e5f9a59be9a6ffb47aef1651f9" +checksum = "5192cca8006f1fd4f7237516f40fa183bb07f8fbdfedaa0036de5ea9b0b45e78" [[package]] name = "anstyle-parse" -version = "0.2.6" +version = "0.2.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b2d16507662817a6a20a9ea92df6652ee4f94f914589377d69f3b21bc5798a9" +checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2" dependencies = [ "utf8parse", ] [[package]] name = "anstyle-query" -version = "1.1.2" +version = "1.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "79947af37f4177cfead1110013d678905c37501914fba0efea834c3fe9a8d60c" +checksum = "9e231f6134f61b71076a3eab506c379d4f36122f2af15a9ff04415ea4c3339e2" dependencies = [ - "windows-sys 0.59.0", + "windows-sys 0.60.2", ] [[package]] name = "anstyle-wincon" -version = "3.0.7" +version = "3.0.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ca3534e77181a9cc07539ad51f2141fe32f6c3ffd4df76db8ad92346b003ae4e" +checksum = "3e0633414522a32ffaac8ac6cc8f748e090c5717661fddeea04219e2344f5f2a" dependencies = [ "anstyle", - "once_cell", - "windows-sys 0.59.0", + "once_cell_polyfill", + "windows-sys 0.60.2", ] [[package]] name = "anyhow" -version = "1.0.98" +version = "1.0.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e16d2d3311acee920a9eb8d33b8cbc1787ce4a264e85f964c2404b969bdcd487" +checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61" [[package]] name = "atty" @@ -172,36 +146,9 @@ dependencies = [ [[package]] name = "autocfg" -version = "1.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" - -[[package]] -name = "backtrace" -version = "0.3.74" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d82cb332cdfaed17ae235a638438ac4d4839913cc2af585c3c6746e8f8bee1a" -dependencies = [ - "addr2line", - "cfg-if", - "libc", - "miniz_oxide", - "object", - "rustc-demangle", - "windows-targets 0.52.6", -] - -[[package]] -name = "base64" -version = "0.21.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" - -[[package]] -name = "base64ct" -version = "1.7.3" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89e25b6adfb930f02d1981565a6e5d9c547ac15a96606256d3b59040e5cd4ca3" +checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" [[package]] name = "bitflags" @@ -211,18 +158,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bitflags" -version = "2.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c8214115b7bf84099f1309324e63141d4c5d7cc26862f97a0a857dbefe165bd" - -[[package]] -name = "block-buffer" -version = "0.10.4" +version = "2.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" -dependencies = [ - "generic-array", -] +checksum = "2261d10cca569e4643e526d8dc2e62e433cc8aba21ab764233731f8d369bf394" [[package]] name = "box-format" @@ -277,9 +215,9 @@ dependencies = [ [[package]] name = "bumpalo" -version = "3.17.0" +version = "3.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1628fb46dfa0b37568d12e5edd512553eccf6a22a78e8bde00bb4aed84d5bdbf" +checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43" [[package]] name = "byteorder" @@ -287,61 +225,13 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" -[[package]] -name = "bytes" -version = "1.10.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a" - -[[package]] -name = "bzip2" -version = "0.4.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bdb116a6ef3f6c3698828873ad02c3014b3c85cadb88496095628e3ef1e347f8" -dependencies = [ - "bzip2-sys", - "libc", -] - -[[package]] -name = "bzip2-sys" -version = "0.1.13+1.0.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "225bff33b2141874fe80d71e07d6eec4f85c5c216453dd96388240f96e1acc14" -dependencies = [ - "cc", - "pkg-config", -] - -[[package]] -name = "cached-path" -version = "0.5.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f1c56d30236522ab3393a08746b138d4e16372001f42d29c88d513aeb8ab7ef" -dependencies = [ - "flate2", - "fs2", - "glob", - "indicatif 0.16.2", - "log", - "rand", - "reqwest", - "serde", - "serde_json", - "sha2", - "tar", - "tempfile", - "thiserror", - "zip 0.5.13", - "zip-extensions", -] - [[package]] name = "cc" -version = "1.2.21" +version = "1.2.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8691782945451c1c383942c4874dbe63814f61cb57ef773cda2972682b7bb3c0" +checksum = "e1d05d92f4b1fd76aad469d46cdd858ca761576082cd37df81416691e50199fb" dependencies = [ + "find-msvc-tools", "jobserver", "libc", "shlex", @@ -371,22 +261,21 @@ dependencies = [ "pretty_env_logger", "proc-macro2", "quote", - "syn 2.0.101", + "syn 2.0.106", ] [[package]] name = "cfg-if" -version = "1.0.0" +version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +checksum = "2fd1289c04a9ea8cb22300a459a72a385d7c73d3259e2ed7dcb2af674838cfa9" [[package]] name = "chrono" -version = "0.4.41" +version = "0.4.42" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c469d952047f47f91b68d1cba3f10d63c11d73e4636f24f08daf0278abf01c4d" +checksum = "145052bdd345b87320e369255277e3fb5152762ad123a901ef5c262dd38fe8d2" dependencies = [ - "android-tzdata", "iana-time-zone", "js-sys", "num-traits", @@ -394,16 +283,6 @@ dependencies = [ "windows-link", ] -[[package]] -name = "cipher" -version = "0.4.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "773f3b9af64447d2ce9850330c473515014aa235e6a783b02db81ff39e4a3dad" -dependencies = [ - "crypto-common", - "inout", -] - [[package]] name = "clap" version = "2.34.0" @@ -421,9 +300,9 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.37" +version = "4.5.48" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eccb054f56cbd38340b380d4a8e69ef1f02f1af43db2f0cc817a4774d80ae071" +checksum = "e2134bb3ea021b78629caa971416385309e0131b351b25e01dc16fb54e1b5fae" dependencies = [ "clap_builder", "clap_derive", @@ -431,9 +310,9 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.5.37" +version = "4.5.48" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "efd9466fac8543255d3b1fcad4762c5e116ffe808c8a3043d4263cd4fd4862a2" +checksum = "c2ba64afa3c0a6df7fa517765e31314e983f51dda798ffba27b988194fb65dc9" dependencies = [ "anstream", "anstyle", @@ -443,27 +322,27 @@ dependencies = [ [[package]] name = "clap_derive" -version = "4.5.32" +version = "4.5.47" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09176aae279615badda0765c0c0b3f6ed53f4709118af73cf4655d85d1530cd7" +checksum = "bbfd7eae0b0f1a6e63d4b13c9c478de77c2eb546fba158ad50b4203dc24b9f9c" dependencies = [ "heck 0.5.0", "proc-macro2", "quote", - "syn 2.0.101", + "syn 2.0.106", ] [[package]] name = "clap_lex" -version = "0.7.4" +version = "0.7.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f46ad14479a25103f283c0f10005961cf086d8dc42205bb44c46ac563475dca6" +checksum = "b94f61472cee1439c0b966b47e3aca9ae07e45d070759512cd390ea2bebc6675" [[package]] name = "colorchoice" -version = "1.0.3" +version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990" +checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" [[package]] name = "comde" @@ -476,36 +355,20 @@ dependencies = [ "flate2", "liblzma", "snap", - "zstd 0.13.3", + "zstd", ] [[package]] name = "console" -version = "0.15.11" +version = "0.16.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "054ccb5b10f9f2cbf51eb355ca1d05c2d279ce1804688d0db74b4733a5aeafd8" +checksum = "b430743a6eb14e9764d4260d4c0d8123087d504eeb9c48f2b2a5e810dd369df4" dependencies = [ "encode_unicode", "libc", "once_cell", - "unicode-width 0.2.0", - "windows-sys 0.59.0", -] - -[[package]] -name = "constant_time_eq" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "245097e9a4535ee1e3e3931fcfcd55a796a44c643e8596ff6566d68f09b87bbc" - -[[package]] -name = "core-foundation" -version = "0.9.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91e195e091a93c46f7102ec7818a2aa394e1e1771c3ab4825963fa03e45afb8f" -dependencies = [ - "core-foundation-sys", - "libc", + "unicode-width 0.2.2", + "windows-sys 0.61.2", ] [[package]] @@ -523,20 +386,11 @@ dependencies = [ "memchr", ] -[[package]] -name = "cpufeatures" -version = "0.2.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280" -dependencies = [ - "libc", -] - [[package]] name = "crc32fast" -version = "1.4.2" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a97769d94ddab943e4510d138150169a2758b5ef3eb191a9ee688de3e23ef7b3" +checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511" dependencies = [ "cfg-if", ] @@ -566,16 +420,6 @@ version = "0.8.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" -[[package]] -name = "crypto-common" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" -dependencies = [ - "generic-array", - "typenum", -] - [[package]] name = "csv" version = "1.3.1" @@ -599,49 +443,19 @@ dependencies = [ [[package]] name = "ctor" -version = "0.2.7" +version = "0.2.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad291aa74992b9b7a7e88c38acbbf6ad7e107f1d90ee8775b7bc1fc3394f485c" +checksum = "32a2785755761f3ddc1492979ce1e48d2c00d09311c39e4466429188f3dd6501" dependencies = [ "quote", - "syn 2.0.101", -] - -[[package]] -name = "curl" -version = "0.4.47" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d9fb4d13a1be2b58f14d60adba57c9834b78c62fd86c3e76a148f732686e9265" -dependencies = [ - "curl-sys", - "libc", - "openssl-probe", - "openssl-sys", - "schannel", - "socket2", - "windows-sys 0.52.0", -] - -[[package]] -name = "curl-sys" -version = "0.4.80+curl-8.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "55f7df2eac63200c3ab25bde3b2268ef2ee56af3d238e76d61f01c3c49bff734" -dependencies = [ - "cc", - "libc", - "libz-sys", - "openssl-sys", - "pkg-config", - "vcpkg", - "windows-sys 0.52.0", + "syn 2.0.106", ] [[package]] name = "darling" -version = "0.20.8" +version = "0.20.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "54e36fcd13ed84ffdfda6f5be89b31287cbb80c439841fe69e04841435464391" +checksum = "fc7f46116c46ff9ab3eb1597a45688b6715c6e628b5c133e288e709a29bcb4ee" dependencies = [ "darling_core", "darling_macro", @@ -649,27 +463,27 @@ dependencies = [ [[package]] name = "darling_core" -version = "0.20.8" +version = "0.20.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c2cf1c23a687a1feeb728783b993c4e1ad83d99f351801977dd809b48d0a70f" +checksum = "0d00b9596d185e565c2207a0b01f8bd1a135483d02d9b7b0a54b11da8d53412e" dependencies = [ "fnv", "ident_case", "proc-macro2", "quote", - "strsim 0.10.0", - "syn 2.0.101", + "strsim 0.11.1", + "syn 2.0.106", ] [[package]] name = "darling_macro" -version = "0.20.8" +version = "0.20.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a668eda54683121533a393014d8692171709ff57a7d61f187b6e782719f8933f" +checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead" dependencies = [ "darling_core", "quote", - "syn 2.0.101", + "syn 2.0.106", ] [[package]] @@ -680,58 +494,7 @@ checksum = "4e018fccbeeb50ff26562ece792ed06659b9c2dae79ece77c4456bb10d9bf79b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.101", -] - -[[package]] -name = "deranged" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c9e6a11ca8224451684bc0d7d5a7adbf8f2fd6887261a1cfc3c0432f9d4068e" -dependencies = [ - "powerfmt", -] - -[[package]] -name = "digest" -version = "0.10.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" -dependencies = [ - "block-buffer", - "crypto-common", - "subtle", -] - -[[package]] -name = "dirs" -version = "4.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ca3aa72a6f96ea37bbc5aa912f6788242832f75369bdfdadcb0e38423f100059" -dependencies = [ - "dirs-sys", -] - -[[package]] -name = "dirs-sys" -version = "0.3.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b1d1d91c932ef41c0f2663aa8b0ca0342d444d842c06914aa0a7e352d0bada6" -dependencies = [ - "libc", - "redox_users", - "winapi", -] - -[[package]] -name = "displaydoc" -version = "0.2.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.101", + "syn 2.0.106", ] [[package]] @@ -748,12 +511,12 @@ dependencies = [ "byteorder", "cffi", "eieio", - "env_logger 0.11.2", + "env_logger 0.11.8", "flatbuffers", "fs_extra", "globwalk", - "hashbrown 0.11.2", - "itertools 0.12.1", + "hashbrown", + "itertools", "language-tags", "libc", "lifeguard", @@ -761,14 +524,11 @@ dependencies = [ "memmap2", "parking_lot", "pathos", - "rust-bert", - "rust_tokenizers", "serde", "serde-xml-rs", "serde_json", "smol_str", "strsim 0.11.1", - "tch", "tempfile", "thiserror", "unic-char-property", @@ -777,7 +537,7 @@ dependencies = [ "unic-segment", "unic-ucd-category", "unic-ucd-common", - "zip 0.5.13", + "zip", ] [[package]] @@ -812,20 +572,11 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "34aa73646ffb006b8f5147f3dc182bd4bcb190227ce861fc4a4844bf8e3cb2c0" -[[package]] -name = "encoding_rs" -version = "0.8.35" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75030f3c4f45dafd7586dd6780965a8c7e8e285a5ecb86713e63a79c5b2766f3" -dependencies = [ - "cfg-if", -] - [[package]] name = "env_filter" -version = "0.1.0" +version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a009aa4810eb158359dda09d0c87378e4bbb89b5a801f016885a4707ba24f7ea" +checksum = "186e05a59d4c50738528153b83b0b0194d3a29507dfec16eccd4b342903397d0" dependencies = [ "log", "regex", @@ -846,31 +597,25 @@ dependencies = [ [[package]] name = "env_logger" -version = "0.11.2" +version = "0.11.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c012a26a7f605efc424dd53697843a72be7dc86ad2d01f7814337794a12231d" +checksum = "13c863f0904021b108aa8b2f55046443e6b1ebde8fd4a15c399893aae4fa069f" dependencies = [ "anstream", "anstyle", "env_filter", - "humantime", + "jiff", "log", ] -[[package]] -name = "equivalent" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" - [[package]] name = "errno" -version = "0.3.11" +version = "0.3.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "976dd42dc7e85965fe702eb8164f21f450704bdde31faefd6471dba214cb594e" +checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" dependencies = [ "libc", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -889,16 +634,10 @@ dependencies = [ ] [[package]] -name = "filetime" -version = "0.2.25" +name = "find-msvc-tools" +version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "35c0522e981e68cbfa8c3f978441a5f34b30b96e146b33cd3359176b50fe8586" -dependencies = [ - "cfg-if", - "libc", - "libredox", - "windows-sys 0.59.0", -] +checksum = "0399f9d26e5191ce32c498bebd31e7a3ceabc2745f0ac54af3f335126c3f24b3" [[package]] name = "flatbuffers" @@ -911,9 +650,9 @@ dependencies = [ [[package]] name = "flate2" -version = "1.1.1" +version = "1.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ced92e76e966ca2fd84c8f7aa01a4aea65b0eb6648d72f7c8f3e2764a67fece" +checksum = "dc5a4e564e38c699f2880d3fda590bedc2e69f3f84cd48b457bd892ce61d0aa9" dependencies = [ "crc32fast", "miniz_oxide", @@ -925,30 +664,6 @@ version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" -[[package]] -name = "foreign-types" -version = "0.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1" -dependencies = [ - "foreign-types-shared", -] - -[[package]] -name = "foreign-types-shared" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b" - -[[package]] -name = "form_urlencoded" -version = "1.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e13624c2627564efccf4934284bdd98cbaa14e79b0b5a141218e507b3a823456" -dependencies = [ - "percent-encoding", -] - [[package]] name = "fruity__bbqsrc" version = "0.2.0" @@ -958,80 +673,12 @@ dependencies = [ "malloced", ] -[[package]] -name = "fs2" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9564fc758e15025b46aa6643b1b77d047d1a56a1aea6e01002ac0c7026876213" -dependencies = [ - "libc", - "winapi", -] - [[package]] name = "fs_extra" version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c" -[[package]] -name = "futures-channel" -version = "0.3.31" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10" -dependencies = [ - "futures-core", -] - -[[package]] -name = "futures-core" -version = "0.3.31" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e" - -[[package]] -name = "futures-io" -version = "0.3.31" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6" - -[[package]] -name = "futures-sink" -version = "0.3.31" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e575fab7d1e0dcb8d0c7bcf9a63ee213816ab51902e6d244a95819acacf1d4f7" - -[[package]] -name = "futures-task" -version = "0.3.31" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" - -[[package]] -name = "futures-util" -version = "0.3.31" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81" -dependencies = [ - "futures-core", - "futures-io", - "futures-task", - "memchr", - "pin-project-lite", - "pin-utils", - "slab", -] - -[[package]] -name = "generic-array" -version = "0.14.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" -dependencies = [ - "typenum", - "version_check", -] - [[package]] name = "getrandom" version = "0.2.16" @@ -1040,33 +687,21 @@ checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592" dependencies = [ "cfg-if", "libc", - "wasi 0.11.0+wasi-snapshot-preview1", + "wasi 0.11.1+wasi-snapshot-preview1", ] [[package]] name = "getrandom" -version = "0.3.2" +version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "73fea8450eea4bac3940448fb7ae50d91f034f941199fcd9d909a5a07aa455f0" +checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4" dependencies = [ "cfg-if", "libc", "r-efi", - "wasi 0.14.2+wasi-0.2.4", + "wasi 0.14.7+wasi-0.2.4", ] -[[package]] -name = "gimli" -version = "0.31.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f" - -[[package]] -name = "glob" -version = "0.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8d1add55171497b4705a648c6b583acafb01d58050a51727785f0b2c8e0a2b2" - [[package]] name = "globset" version = "0.4.16" @@ -1086,7 +721,7 @@ version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0bf760ebf69878d9fd8f110c89703d90ce35095324d1f1edcb595c63945ee757" dependencies = [ - "bitflags 2.9.0", + "bitflags 2.9.4", "ignore", "walkdir", ] @@ -1111,31 +746,6 @@ dependencies = [ "syn 1.0.109", ] -[[package]] -name = "h2" -version = "0.3.26" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81fe527a889e1532da5c525686d96d4c2e74cdd345badf8dfef9f6b39dd5f5e8" -dependencies = [ - "bytes", - "fnv", - "futures-core", - "futures-sink", - "futures-util", - "http", - "indexmap", - "slab", - "tokio", - "tokio-util", - "tracing", -] - -[[package]] -name = "half" -version = "1.8.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b43ede17f21864e81be2fa654110bf1e793774238d86ef8555c37e6519c0403" - [[package]] name = "hashbrown" version = "0.11.2" @@ -1146,21 +756,6 @@ dependencies = [ "serde", ] -[[package]] -name = "hashbrown" -version = "0.12.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" -dependencies = [ - "ahash", -] - -[[package]] -name = "hashbrown" -version = "0.15.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "84b26c544d002229e640969970a2e74021aadf6e2f96372b9c58eff97de08eb3" - [[package]] name = "heck" version = "0.3.3" @@ -1193,107 +788,21 @@ dependencies = [ [[package]] name = "hermit-abi" -version = "0.3.9" +version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" - -[[package]] -name = "hermit-abi" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f154ce46856750ed433c8649605bf7ed2de3bc35fd9d2a9f30cddd873c80cb08" - -[[package]] -name = "hmac" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c49c37c09c17a53d937dfbb742eb3a961d65a994e6bcdcf37e7399d0cc8ab5e" -dependencies = [ - "digest", -] - -[[package]] -name = "http" -version = "0.2.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "601cbb57e577e2f5ef5be8e7b83f0f63994f25aa94d673e54a92d5c516d101f1" -dependencies = [ - "bytes", - "fnv", - "itoa", -] - -[[package]] -name = "http-body" -version = "0.4.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ceab25649e9960c0311ea418d17bee82c0dcec1bd053b5f9a66e265a693bed2" -dependencies = [ - "bytes", - "http", - "pin-project-lite", -] - -[[package]] -name = "httparse" -version = "1.10.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6dbf3de79e51f3d586ab4cb9d5c3e2c14aa28ed23d180cf89b4df0454a69cc87" - -[[package]] -name = "httpdate" -version = "1.0.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" +checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c" [[package]] name = "humantime" -version = "2.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b112acc8b3adf4b107a8ec20977da0273a8c386765a3ec0229bd500a1443f9f" - -[[package]] -name = "hyper" -version = "0.14.32" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "41dfc780fdec9373c01bae43289ea34c972e40ee3c9f6b3c8801a35f35586ce7" -dependencies = [ - "bytes", - "futures-channel", - "futures-core", - "futures-util", - "h2", - "http", - "http-body", - "httparse", - "httpdate", - "itoa", - "pin-project-lite", - "socket2", - "tokio", - "tower-service", - "tracing", - "want", -] - -[[package]] -name = "hyper-tls" -version = "0.5.0" +version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d6183ddfa99b85da61a140bea0efc93fdf56ceaa041b37d553518030827f9905" -dependencies = [ - "bytes", - "hyper", - "native-tls", - "tokio", - "tokio-native-tls", -] +checksum = "135b12329e5e3ce057a9f972339ea52bc954fe1e9358ef27f95e89716fbc5424" [[package]] name = "iana-time-zone" -version = "0.1.63" +version = "0.1.64" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0c919e5debc312ad217002b8048a17b7d83f80703865bbfcfebb0458b0b27d8" +checksum = "33e57f83510bb73707521ebaffa789ec8caf86f9657cad665b092b581d40e9fb" dependencies = [ "android_system_properties", "core-foundation-sys", @@ -1313,151 +822,12 @@ dependencies = [ "cc", ] -[[package]] -name = "icu_collections" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db2fa452206ebee18c4b5c2274dbf1de17008e874b4dc4f0aea9d01ca79e4526" -dependencies = [ - "displaydoc", - "yoke", - "zerofrom", - "zerovec", -] - -[[package]] -name = "icu_locid" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13acbb8371917fc971be86fc8057c41a64b521c184808a698c02acc242dbf637" -dependencies = [ - "displaydoc", - "litemap", - "tinystr", - "writeable", - "zerovec", -] - -[[package]] -name = "icu_locid_transform" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "01d11ac35de8e40fdeda00d9e1e9d92525f3f9d887cdd7aa81d727596788b54e" -dependencies = [ - "displaydoc", - "icu_locid", - "icu_locid_transform_data", - "icu_provider", - "tinystr", - "zerovec", -] - -[[package]] -name = "icu_locid_transform_data" -version = "1.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7515e6d781098bf9f7205ab3fc7e9709d34554ae0b21ddbcb5febfa4bc7df11d" - -[[package]] -name = "icu_normalizer" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "19ce3e0da2ec68599d193c93d088142efd7f9c5d6fc9b803774855747dc6a84f" -dependencies = [ - "displaydoc", - "icu_collections", - "icu_normalizer_data", - "icu_properties", - "icu_provider", - "smallvec", - "utf16_iter", - "utf8_iter", - "write16", - "zerovec", -] - -[[package]] -name = "icu_normalizer_data" -version = "1.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c5e8338228bdc8ab83303f16b797e177953730f601a96c25d10cb3ab0daa0cb7" - -[[package]] -name = "icu_properties" -version = "1.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93d6020766cfc6302c15dbbc9c8778c37e62c14427cb7f6e601d849e092aeef5" -dependencies = [ - "displaydoc", - "icu_collections", - "icu_locid_transform", - "icu_properties_data", - "icu_provider", - "tinystr", - "zerovec", -] - -[[package]] -name = "icu_properties_data" -version = "1.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85fb8799753b75aee8d2a21d7c14d9f38921b54b3dbda10f5a3c7a7b82dba5e2" - -[[package]] -name = "icu_provider" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ed421c8a8ef78d3e2dbc98a973be2f3770cb42b606e3ab18d6237c4dfde68d9" -dependencies = [ - "displaydoc", - "icu_locid", - "icu_provider_macros", - "stable_deref_trait", - "tinystr", - "writeable", - "yoke", - "zerofrom", - "zerovec", -] - -[[package]] -name = "icu_provider_macros" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ec89e9337638ecdc08744df490b221a7399bf8d164eb52a665454e60e075ad6" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.101", -] - [[package]] name = "ident_case" version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" -[[package]] -name = "idna" -version = "1.0.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "686f825264d630750a544639377bae737628043f20d38bbc029e8f29ea968a7e" -dependencies = [ - "idna_adapter", - "smallvec", - "utf8_iter", -] - -[[package]] -name = "idna_adapter" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "daca1df1c957320b2cf139ac61e7bd64fed304c5040df000a745aa1de3b4ef71" -dependencies = [ - "icu_normalizer", - "icu_properties", -] - [[package]] name = "ignore" version = "0.4.23" @@ -1474,16 +844,6 @@ dependencies = [ "winapi-util", ] -[[package]] -name = "indexmap" -version = "2.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cea70ddb795996207ad57735b50c5982d8844f38ba9ee5f1aedcfb708a2aa11e" -dependencies = [ - "equivalent", - "hashbrown 0.15.3", -] - [[package]] name = "indicatif" version = "0.15.0" @@ -1492,32 +852,11 @@ checksum = "7baab56125e25686df467fe470785512329883aab42696d661247aca2a2896e4" dependencies = [ "console", "lazy_static", - "number_prefix 0.3.0", + "number_prefix", "rayon", "regex", ] -[[package]] -name = "indicatif" -version = "0.16.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2d207dc617c7a380ab07ff572a6e52fa202a2a8f355860ac9c38e23f8196be1b" -dependencies = [ - "console", - "lazy_static", - "number_prefix 0.4.0", - "regex", -] - -[[package]] -name = "inout" -version = "0.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "879f10e63c20629ecabbb64a8010319738c66a5cd0c29b02d63d272b03751d01" -dependencies = [ - "generic-array", -] - [[package]] name = "instant" version = "0.1.13" @@ -1527,12 +866,6 @@ dependencies = [ "cfg-if", ] -[[package]] -name = "ipnet" -version = "2.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "469fb0b9cefa57e3ef31275ee7cacb78f2fdca44e4765491884a2b119d4eb130" - [[package]] name = "iref" version = "1.4.3" @@ -1549,7 +882,7 @@ version = "0.4.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e04d7f318608d35d4b61ddd75cbdaee86b023ebe2bd5a66ee0915f0bf93095a9" dependencies = [ - "hermit-abi 0.5.1", + "hermit-abi 0.5.2", "libc", "windows-sys 0.59.0", ] @@ -1560,15 +893,6 @@ version = "1.70.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" -[[package]] -name = "itertools" -version = "0.10.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" -dependencies = [ - "either", -] - [[package]] name = "itertools" version = "0.12.1" @@ -1584,21 +908,45 @@ version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" +[[package]] +name = "jiff" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be1f93b8b1eb69c77f24bbb0afdf66f54b632ee39af40ca21c4365a1d7347e49" +dependencies = [ + "jiff-static", + "log", + "portable-atomic", + "portable-atomic-util", + "serde", +] + +[[package]] +name = "jiff-static" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "03343451ff899767262ec32146f6d559dd759fdadf42ff0e227c7c48f72594b4" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.106", +] + [[package]] name = "jobserver" -version = "0.1.33" +version = "0.1.34" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38f262f097c174adebe41eb73d66ae9c06b2844fb0da69969647bbddd9b0538a" +checksum = "9afb3de4395d6b3e67a780b6de64b51c978ecf11cb9a462c66be7d4ca9039d33" dependencies = [ - "getrandom 0.3.2", + "getrandom 0.3.3", "libc", ] [[package]] name = "js-sys" -version = "0.3.77" +version = "0.3.81" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1cfaf33c695fc6e08064efbc1f72ec937429614f25eef83af942d0e227c3a28f" +checksum = "ec48937a97411dcb524a265206ccd4c90bb711fca92b2792c407f268825b9305" dependencies = [ "once_cell", "wasm-bindgen", @@ -1618,15 +966,15 @@ checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" [[package]] name = "libc" -version = "0.2.172" +version = "0.2.177" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d750af042f7ef4f724306de029d18836c26c1765a54a6a3f094cbd23a7267ffa" +checksum = "2874a2af47a2325c2001a6e6fad9b16a53b802102b528163885171cf92b15976" [[package]] name = "liblzma" -version = "0.4.1" +version = "0.4.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "66352d7a8ac12d4877b6e6ea5a9b7650ee094257dc40889955bea5bc5b08c1d0" +checksum = "73c36d08cad03a3fbe2c4e7bb3a9e84c57e4ee4135ed0b065cade3d98480c648" dependencies = [ "liblzma-sys", "num_cpus", @@ -1634,36 +982,13 @@ dependencies = [ [[package]] name = "liblzma-sys" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5839bad90c3cc2e0b8c4ed8296b80e86040240f81d46b9c0e9bc8dd51ddd3af1" -dependencies = [ - "cc", - "libc", - "pkg-config", -] - -[[package]] -name = "libredox" -version = "0.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c0ff37bd590ca25063e35af745c343cb7a0271906fb7b37e4813e8f79f00268d" -dependencies = [ - "bitflags 2.9.0", - "libc", - "redox_syscall 0.5.12", -] - -[[package]] -name = "libz-sys" -version = "1.1.22" +version = "0.4.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b70e7a7df205e92a1a4cd9aaae7898dac0aa555503cc0a649494d0d60e7651d" +checksum = "01b9596486f6d60c3bbe644c0e1be1aa6ccc472ad630fe8927b456973d7cb736" dependencies = [ "cc", "libc", "pkg-config", - "vcpkg", ] [[package]] @@ -1674,31 +999,24 @@ checksum = "89be94dbd775db37b46ca4f4bf5cf89adfb13ba197bfbcb69b2122848ee73c26" [[package]] name = "linux-raw-sys" -version = "0.9.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd945864f07fe9f5371a27ad7b52a172b4b499999f1d97574c9fa68373937e12" - -[[package]] -name = "litemap" -version = "0.7.5" +version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23fb14cb19457329c82206317a5663005a4d404783dc74f4252769b0d5f42856" +checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039" [[package]] name = "lock_api" -version = "0.4.12" +version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17" +checksum = "224399e74b87b5f3557511d98dff8b14089b3dadafcab6bb93eab67d3aace965" dependencies = [ - "autocfg", "scopeguard", ] [[package]] name = "log" -version = "0.4.27" +version = "0.4.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94" +checksum = "34080505efa8e45a4b816c349525ebe327ceaa8559756f0356cba97ef3bf7432" [[package]] name = "malloced" @@ -1712,263 +1030,110 @@ version = "0.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2532096657941c2fea9c289d370a250971c689d4f143798ff67113ec042024a5" -[[package]] -name = "matrixmultiply" -version = "0.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9380b911e3e96d10c1f415da0876389aaf1b56759054eeb0de7df940c456ba1a" -dependencies = [ - "autocfg", - "rawpointer", -] - [[package]] name = "memchr" -version = "2.7.4" +version = "2.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" +checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" [[package]] name = "memmap2" -version = "0.9.5" +version = "0.9.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd3f7eed9d3848f8b98834af67102b720745c4ec028fcd0aa0239277e7de374f" +checksum = "843a98750cd611cc2965a8213b53b43e715f13c37a9e096c6408e69990961db7" dependencies = [ "libc", ] -[[package]] -name = "mime" -version = "0.3.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" - [[package]] name = "miniz_oxide" -version = "0.8.8" +version = "0.8.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3be647b768db090acb35d5ec5db2b0e1f1de11133ca123b9eacf5137868f892a" +checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316" dependencies = [ "adler2", + "simd-adler32", ] [[package]] -name = "mio" -version = "1.0.3" +name = "num-traits" +version = "0.2.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2886843bf800fba2e3377cff24abf6379b4c4d5c6681eaf9ea5b0d15090450bd" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" dependencies = [ - "libc", - "wasi 0.11.0+wasi-snapshot-preview1", - "windows-sys 0.52.0", + "autocfg", ] [[package]] -name = "native-tls" -version = "0.2.14" +name = "num_cpus" +version = "1.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87de3442987e9dbec73158d5c715e7ad9072fda936bb03d19d7fa10e00520f0e" +checksum = "91df4bbde75afed763b708b7eee1e8e7651e02d97f6d5dd763e89367e957b23b" dependencies = [ + "hermit-abi 0.5.2", "libc", - "log", - "openssl", - "openssl-probe", - "openssl-sys", - "schannel", - "security-framework", - "security-framework-sys", - "tempfile", ] [[package]] -name = "ndarray" -version = "0.15.6" +name = "number_prefix" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "adb12d4e967ec485a5f71c6311fe28158e9d6f4bc4a447b474184d0f91a8fa32" -dependencies = [ - "matrixmultiply", - "num-complex", - "num-integer", - "num-traits", - "rawpointer", -] +checksum = "17b02fc0ff9a9e4b35b3342880f48e896ebf69f2967921fe8646bf5b7125956a" [[package]] -name = "num-complex" -version = "0.4.6" +name = "once_cell" +version = "1.21.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "73f88a1307638156682bada9d7604135552957b7818057dcef22705b4d509495" -dependencies = [ - "num-traits", -] +checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" [[package]] -name = "num-conv" -version = "0.1.0" +name = "once_cell_polyfill" +version = "1.70.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9" +checksum = "a4895175b425cb1f87721b59f0f286c2092bd4af812243672510e1ac53e2e0ad" [[package]] -name = "num-integer" -version = "0.1.46" +name = "os_str_bytes" +version = "2.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" -dependencies = [ - "num-traits", -] +checksum = "afb2e1c3ee07430c2cf76151675e583e0f19985fa6efae47d6848a3e2c824f85" [[package]] -name = "num-traits" -version = "0.2.19" +name = "parking_lot" +version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +checksum = "7d17b78036a60663b797adeaee46f5c9dfebb86948d1255007a1d6be0271ff99" dependencies = [ - "autocfg", + "instant", + "lock_api", + "parking_lot_core", ] [[package]] -name = "num_cpus" -version = "1.16.0" +name = "parking_lot_core" +version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43" +checksum = "60a2cfe6f0ad2bfc16aefa463b497d5c7a5ecd44a23efa72aa342d90177356dc" dependencies = [ - "hermit-abi 0.3.9", + "cfg-if", + "instant", "libc", + "redox_syscall", + "smallvec", + "winapi", ] [[package]] -name = "number_prefix" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17b02fc0ff9a9e4b35b3342880f48e896ebf69f2967921fe8646bf5b7125956a" - -[[package]] -name = "number_prefix" -version = "0.4.0" +name = "pathdiff" +version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3" +checksum = "df94ce210e5bc13cb6651479fa48d14f601d9858cfe0467f43ae157023b938d3" [[package]] -name = "object" -version = "0.36.7" +name = "pathos" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62948e14d923ea95ea2c7c86c71013138b66525b86bdc08d2dcc262bdb497b87" -dependencies = [ - "memchr", -] - -[[package]] -name = "once_cell" -version = "1.21.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" - -[[package]] -name = "openssl" -version = "0.10.72" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fedfea7d58a1f73118430a55da6a286e7b044961736ce96a16a17068ea25e5da" -dependencies = [ - "bitflags 2.9.0", - "cfg-if", - "foreign-types", - "libc", - "once_cell", - "openssl-macros", - "openssl-sys", -] - -[[package]] -name = "openssl-macros" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.101", -] - -[[package]] -name = "openssl-probe" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d05e27ee213611ffe7d6348b942e8f942b37114c00cc03cec254295a4a17852e" - -[[package]] -name = "openssl-sys" -version = "0.9.108" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e145e1651e858e820e4860f7b9c5e169bc1d8ce1c86043be79fa7b7634821847" -dependencies = [ - "cc", - "libc", - "pkg-config", - "vcpkg", -] - -[[package]] -name = "ordered-float" -version = "2.10.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68f19d67e5a2795c94e73e0bb1cc1a7edeb2e28efd39e2e1c9b7a40c1108b11c" -dependencies = [ - "num-traits", -] - -[[package]] -name = "os_str_bytes" -version = "2.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "afb2e1c3ee07430c2cf76151675e583e0f19985fa6efae47d6848a3e2c824f85" - -[[package]] -name = "parking_lot" -version = "0.11.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d17b78036a60663b797adeaee46f5c9dfebb86948d1255007a1d6be0271ff99" -dependencies = [ - "instant", - "lock_api", - "parking_lot_core", -] - -[[package]] -name = "parking_lot_core" -version = "0.8.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60a2cfe6f0ad2bfc16aefa463b497d5c7a5ecd44a23efa72aa342d90177356dc" -dependencies = [ - "cfg-if", - "instant", - "libc", - "redox_syscall 0.2.16", - "smallvec", - "winapi", -] - -[[package]] -name = "password-hash" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7676374caaee8a325c9e7a2ae557f216c5563a171d6997b0ef8a65af35147700" -dependencies = [ - "base64ct", - "rand_core", - "subtle", -] - -[[package]] -name = "pathdiff" -version = "0.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df94ce210e5bc13cb6651479fa48d14f601d9858cfe0467f43ae157023b938d3" - -[[package]] -name = "pathos" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "afe589db0b3935512cc9f3dede41a52d5555a83ed4f82c86882757fd5f9a184a" +checksum = "afe589db0b3935512cc9f3dede41a52d5555a83ed4f82c86882757fd5f9a184a" dependencies = [ "eieio", "fruity__bbqsrc", @@ -1982,18 +1147,6 @@ dependencies = [ "windirs", ] -[[package]] -name = "pbkdf2" -version = "0.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "83a0692ec44e4cf1ef28ca317f14f8f07da2d95ec3fa01f86e4467b725e60917" -dependencies = [ - "digest", - "hmac", - "password-hash", - "sha2", -] - [[package]] name = "pct-str" version = "1.2.0" @@ -2005,24 +1158,24 @@ dependencies = [ [[package]] name = "percent-encoding" -version = "2.3.1" +version = "2.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" +checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" [[package]] name = "phf" -version = "0.11.2" +version = "0.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ade2d8b8f33c7333b51bcf0428d37e217e9f32192ae4772156f65063b8ce03dc" +checksum = "1fd6780a80ae0c52cc120a26a1a42c1ae51b247a253e4e06113d23d2c2edd078" dependencies = [ "phf_shared", ] [[package]] name = "phf_codegen" -version = "0.11.2" +version = "0.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e8d39688d359e6b34654d328e262234662d16cc0f60ec8dcbe5e718709342a5a" +checksum = "aef8048c789fa5e851558d709946d6d79a8ff88c0440c587967f8e94bfb1216a" dependencies = [ "phf_generator", "phf_shared", @@ -2030,9 +1183,9 @@ dependencies = [ [[package]] name = "phf_generator" -version = "0.11.2" +version = "0.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48e4cc64c2ad9ebe670cb8fd69dd50ae301650392e81c05f9bfcb2d5bdbc24b0" +checksum = "3c80231409c20246a13fddb31776fb942c38553c51e871f8cbd687a4cfb5843d" dependencies = [ "phf_shared", "rand", @@ -2040,9 +1193,9 @@ dependencies = [ [[package]] name = "phf_shared" -version = "0.11.2" +version = "0.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90fcb95eef784c2ac79119d1dd819e162b5da872ce6f3c3abe1e8ca1c082f72b" +checksum = "67eabc2ef2a60eb7faa00097bd1ffdb5bd28e62bf39990626a582201b7a754e5" dependencies = [ "siphasher", ] @@ -2053,12 +1206,6 @@ version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b" -[[package]] -name = "pin-utils" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" - [[package]] name = "pkg-config" version = "0.3.32" @@ -2066,18 +1213,18 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" [[package]] -name = "powerfmt" -version = "0.2.0" +name = "portable-atomic" +version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" +checksum = "f84267b20a16ea918e43c6a88433c2d54fa145c92a811b5b047ccbe153674483" [[package]] -name = "ppv-lite86" -version = "0.2.21" +name = "portable-atomic-util" +version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" +checksum = "d8a2f0d8d040d7848a709caf78912debcc3f33ee4b3cac47d73d1e1069e83507" dependencies = [ - "zerocopy", + "portable-atomic", ] [[package]] @@ -2116,33 +1263,27 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.95" +version = "1.0.101" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778" +checksum = "89ae43fd86e4158d6db51ad8e2b80f313af9cc74f5c0e03ccb87de09998732de" dependencies = [ "unicode-ident", ] -[[package]] -name = "protobuf" -version = "2.27.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf7e6d18738ecd0902d30d1ad232c9125985a3422929b16c65517b38adc14f96" - [[package]] name = "quote" -version = "1.0.40" +version = "1.0.41" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" +checksum = "ce25767e7b499d1b604768e7cde645d14cc8584231ea6b295e9c9eb22c02e1d1" dependencies = [ "proc-macro2", ] [[package]] name = "r-efi" -version = "5.2.0" +version = "5.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "74765f6d916ee2faa39bc8e68e4f3ed8949b48cccdac59983d287a7cb71ce9c5" +checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" [[package]] name = "rand" @@ -2150,18 +1291,6 @@ version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" dependencies = [ - "libc", - "rand_chacha", - "rand_core", -] - -[[package]] -name = "rand_chacha" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" -dependencies = [ - "ppv-lite86", "rand_core", ] @@ -2170,21 +1299,12 @@ name = "rand_core" version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" -dependencies = [ - "getrandom 0.2.16", -] - -[[package]] -name = "rawpointer" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60a357793950651c4ed0f3f52338f53b2f809f32d83a07f72909fa13e4c6c1e3" [[package]] name = "rayon" -version = "1.10.0" +version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa" +checksum = "368f01d005bf8fd9b1206fb6fa653e6c4a81ceb1466406b81792d87c5677a58f" dependencies = [ "either", "rayon-core", @@ -2192,9 +1312,9 @@ dependencies = [ [[package]] name = "rayon-core" -version = "1.12.1" +version = "1.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2" +checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91" dependencies = [ "crossbeam-deque", "crossbeam-utils", @@ -2209,31 +1329,11 @@ dependencies = [ "bitflags 1.3.2", ] -[[package]] -name = "redox_syscall" -version = "0.5.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "928fca9cf2aa042393a8325b9ead81d2f0df4cb12e1e24cef072922ccd99c5af" -dependencies = [ - "bitflags 2.9.0", -] - -[[package]] -name = "redox_users" -version = "0.4.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba009ff324d1fc1b900bd1fdb31564febe58a8ccc8a6fdbb93b543d33b13ca43" -dependencies = [ - "getrandom 0.2.16", - "libredox", - "thiserror", -] - [[package]] name = "regex" -version = "1.11.1" +version = "1.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" +checksum = "8b5288124840bee7b386bc413c487869b360b2b4ec421ea56425128692f2a82c" dependencies = [ "aho-corasick", "memchr", @@ -2243,9 +1343,9 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.4.9" +version = "0.4.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908" +checksum = "833eb9ce86d40ef33cb1306d8accf7bc8ec2bfea4355cbdebb3df68b40925cad" dependencies = [ "aho-corasick", "memchr", @@ -2254,15 +1354,15 @@ dependencies = [ [[package]] name = "regex-syntax" -version = "0.8.5" +version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" +checksum = "caf4aa5b0f434c91fe5c7f1ecb6a5ece2130b02ad2a590589dda5146df959001" [[package]] name = "regtest" version = "0.1.0" dependencies = [ - "clap 4.5.37", + "clap 4.5.48", "csv", "divvunspell", ] @@ -2273,118 +1373,24 @@ version = "1.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ba39f3699c378cd8970968dcbff9c43159ea4cfbd88d43c00b22f2ef10a435d2" -[[package]] -name = "reqwest" -version = "0.11.27" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd67538700a17451e7cba03ac727fb961abb7607553461627b97de0b89cf4a62" -dependencies = [ - "base64", - "bytes", - "encoding_rs", - "futures-core", - "futures-util", - "h2", - "http", - "http-body", - "hyper", - "hyper-tls", - "ipnet", - "js-sys", - "log", - "mime", - "native-tls", - "once_cell", - "percent-encoding", - "pin-project-lite", - "rustls-pemfile", - "serde", - "serde_json", - "serde_urlencoded", - "sync_wrapper", - "system-configuration", - "tokio", - "tokio-native-tls", - "tower-service", - "url", - "wasm-bindgen", - "wasm-bindgen-futures", - "web-sys", - "winreg", -] - -[[package]] -name = "rust-bert" -version = "0.17.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f74d53d058c1478224f08146fb1af6e4466660464a323fb957c2105728c62bf" -dependencies = [ - "cached-path", - "dirs", - "half", - "lazy_static", - "ordered-float", - "rust_tokenizers", - "serde", - "serde_json", - "tch", - "thiserror", - "uuid", -] - -[[package]] -name = "rust_tokenizers" -version = "7.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "196e3b77b07fd5bfcbc8187ecaef5d5931820d9abd6c3fe0a9dc6d3ddb035d72" -dependencies = [ - "csv", - "hashbrown 0.12.3", - "itertools 0.10.5", - "lazy_static", - "protobuf", - "rayon", - "regex", - "serde", - "serde_json", - "thiserror", - "unicode-normalization", - "unicode-normalization-alignments", -] - -[[package]] -name = "rustc-demangle" -version = "0.1.24" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f" - [[package]] name = "rustix" -version = "1.0.7" +version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c71e83d6afe7ff64890ec6b71d6a69bb8a610ab78ce364b3352876bb4c801266" +checksum = "cd15f8a2c5551a84d56efdc1cd049089e409ac19a3072d5037a17fd70719ff3e" dependencies = [ - "bitflags 2.9.0", + "bitflags 2.9.4", "errno", "libc", "linux-raw-sys", - "windows-sys 0.59.0", -] - -[[package]] -name = "rustls-pemfile" -version = "1.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1c74cae0a4cf6ccbbf5f359f08efdf8ee7e1dc532573bf0db71968cb56b1448c" -dependencies = [ - "base64", + "windows-sys 0.61.2", ] [[package]] name = "rustversion" -version = "1.0.20" +version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eded382c5f5f786b989652c49544c4877d9f015cc22e145a5ea8ea66c2921cd2" +checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" [[package]] name = "ryu" @@ -2401,50 +1407,19 @@ dependencies = [ "winapi-util", ] -[[package]] -name = "schannel" -version = "0.1.27" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f29ebaa345f945cec9fbbc532eb307f0fdad8161f281b6369539c8d84876b3d" -dependencies = [ - "windows-sys 0.59.0", -] - [[package]] name = "scopeguard" version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" -[[package]] -name = "security-framework" -version = "2.11.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02" -dependencies = [ - "bitflags 2.9.0", - "core-foundation", - "core-foundation-sys", - "libc", - "security-framework-sys", -] - -[[package]] -name = "security-framework-sys" -version = "2.14.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49db231d56a190491cb4aeda9527f1ad45345af50b0851622a7adb8c03b01c32" -dependencies = [ - "core-foundation-sys", - "libc", -] - [[package]] name = "serde" -version = "1.0.219" +version = "1.0.228" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" dependencies = [ + "serde_core", "serde_derive", ] @@ -2461,60 +1436,36 @@ dependencies = [ ] [[package]] -name = "serde_derive" -version = "1.0.219" +name = "serde_core" +version = "1.0.228" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.101", + "serde_derive", ] [[package]] -name = "serde_json" -version = "1.0.140" +name = "serde_derive" +version = "1.0.228" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "20068b6e96dc6c9bd23e01df8827e6c7e1f2fddd43c21810382803c136b99373" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" dependencies = [ - "itoa", - "memchr", - "ryu", - "serde", + "proc-macro2", + "quote", + "syn 2.0.106", ] [[package]] -name = "serde_urlencoded" -version = "0.7.1" +name = "serde_json" +version = "1.0.145" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d3491c14715ca2294c4d6a88f15e84739788c1d030eed8c110436aafdaa2f3fd" +checksum = "402a6f66d8c709116cf22f558eab210f5a50187f702eb4d7e5ef38d9a7f1c79c" dependencies = [ - "form_urlencoded", "itoa", + "memchr", "ryu", "serde", -] - -[[package]] -name = "sha1" -version = "0.10.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3bf829a2d51ab4a5ddf1352d8470c140cadc8301b2ae1789db023f01cedd6ba" -dependencies = [ - "cfg-if", - "cpufeatures", - "digest", -] - -[[package]] -name = "sha2" -version = "0.10.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283" -dependencies = [ - "cfg-if", - "cpufeatures", - "digest", + "serde_core", ] [[package]] @@ -2524,31 +1475,28 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" [[package]] -name = "siphasher" -version = "0.3.11" +name = "simd-adler32" +version = "0.3.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38b58827f4464d87d377d175e90bf58eb00fd8716ff0a62f80356b5e61555d0d" +checksum = "d66dc143e6b11c1eddc06d5c423cfc97062865baf299914ab64caa38182078fe" [[package]] -name = "slab" -version = "0.4.9" +name = "siphasher" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f92a496fb766b417c996b9c5e57daf2f7ad3b0bebe1ccfca4856390e3d3bb67" -dependencies = [ - "autocfg", -] +checksum = "56199f7ddabf13fe5074ce809e7d3f42b42ae711800501b5b16ea82ad029c39d" [[package]] name = "smallvec" -version = "1.15.0" +version = "1.15.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8917285742e9f3e1683f0a9c4e6b57960b7314d0b08d30d1ecd426713ee2eee9" +checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" [[package]] name = "smol_str" -version = "0.2.1" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6845563ada680337a52d43bb0b29f396f2d911616f6573012645b9e3d048a49" +checksum = "dd538fb6910ac1099850255cf94a94df6551fbdd602454387d0adb2d1ca6dead" dependencies = [ "serde", ] @@ -2559,34 +1507,12 @@ version = "1.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1b6b67fb9a61334225b5b790716f609cd58395f895b3fe8b328786812a40bc3b" -[[package]] -name = "socket2" -version = "0.5.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4f5fd57c80058a56cf5c777ab8a126398ece8e442983605d280a44ce79d0edef" -dependencies = [ - "libc", - "windows-sys 0.52.0", -] - -[[package]] -name = "stable_deref_trait" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" - [[package]] name = "strsim" version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a" -[[package]] -name = "strsim" -version = "0.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" - [[package]] name = "strsim" version = "0.11.1" @@ -2617,12 +1543,6 @@ dependencies = [ "syn 1.0.109", ] -[[package]] -name = "subtle" -version = "2.6.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" - [[package]] name = "syn" version = "1.0.109" @@ -2636,9 +1556,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.101" +version = "2.0.106" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ce2b7fc941b3a24138a0a7cf8e858bfc6a992e7978a068a5c760deb0ed43caf" +checksum = "ede7c438028d4436d71104916910f5bb611972c5cfd7f89b8300a8186e6fada6" dependencies = [ "proc-macro2", "quote", @@ -2646,244 +1566,67 @@ dependencies = [ ] [[package]] -name = "sync_wrapper" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2047c6ded9c721764247e62cd3b03c09ffc529b2ba5b10ec482ae507a4a70160" - -[[package]] -name = "synstructure" -version = "0.13.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.101", -] - -[[package]] -name = "system-configuration" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba3a3adc5c275d719af8cb4272ea1c4a6d668a777f37e115f6d11ddbc1c8e0e7" -dependencies = [ - "bitflags 1.3.2", - "core-foundation", - "system-configuration-sys", -] - -[[package]] -name = "system-configuration-sys" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a75fb188eb626b924683e3b95e3a48e63551fcfb51949de2f06a9d91dbee93c9" -dependencies = [ - "core-foundation-sys", - "libc", -] - -[[package]] -name = "tar" -version = "0.4.44" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d863878d212c87a19c1a610eb53bb01fe12951c0501cf5a0d65f724914a667a" -dependencies = [ - "filetime", - "libc", - "xattr", -] - -[[package]] -name = "tch" -version = "0.6.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b73f876b186599e22b01fa6ebfeea2dee2f11e8083463ab3572933d8201436b" -dependencies = [ - "half", - "lazy_static", - "libc", - "ndarray", - "rand", - "thiserror", - "torch-sys", - "zip 0.5.13", -] - -[[package]] -name = "tempfile" -version = "3.19.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7437ac7763b9b123ccf33c338a5cc1bac6f69b45a136c19bdd8a65e3916435bf" -dependencies = [ - "fastrand", - "getrandom 0.3.2", - "once_cell", - "rustix", - "windows-sys 0.59.0", -] - -[[package]] -name = "termcolor" -version = "1.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06794f8f6c5c898b3275aebefa6b8a1cb24cd2c6c79397ab15774837a0bc5755" -dependencies = [ - "winapi-util", -] - -[[package]] -name = "textwrap" -version = "0.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060" -dependencies = [ - "unicode-width 0.1.14", -] - -[[package]] -name = "thfst-tools" -version = "1.0.0-beta.3" -dependencies = [ - "box-format", - "divvunspell", - "serde_json", - "structopt", - "tempfile", -] - -[[package]] -name = "thiserror" -version = "1.0.69" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" -dependencies = [ - "thiserror-impl", -] - -[[package]] -name = "thiserror-impl" -version = "1.0.69" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.101", -] - -[[package]] -name = "time" -version = "0.1.45" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b797afad3f312d1c66a56d11d0316f916356d11bd158fbc6ca6389ff6bf805a" -dependencies = [ - "libc", - "wasi 0.10.0+wasi-snapshot-preview1", - "winapi", -] - -[[package]] -name = "time" -version = "0.3.41" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a7619e19bc266e0f9c5e6686659d394bc57973859340060a69221e57dbc0c40" -dependencies = [ - "deranged", - "num-conv", - "powerfmt", - "serde", - "time-core", -] - -[[package]] -name = "time-core" -version = "0.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c9e9a38711f559d9e3ce1cdb06dd7c5b8ea546bc90052da6d06bb76da74bb07c" - -[[package]] -name = "tinystr" -version = "0.7.6" +name = "tempfile" +version = "3.23.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9117f5d4db391c1cf6927e7bea3db74b9a1c1add8f7eda9ffd5364f40f57b82f" +checksum = "2d31c77bdf42a745371d260a26ca7163f1e0924b64afa0b688e61b5a9fa02f16" dependencies = [ - "displaydoc", - "zerovec", + "fastrand", + "getrandom 0.3.3", + "once_cell", + "rustix", + "windows-sys 0.61.2", ] [[package]] -name = "tinyvec" -version = "1.9.0" +name = "termcolor" +version = "1.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09b3661f17e86524eccd4371ab0429194e0d7c008abb45f7a7495b1719463c71" +checksum = "06794f8f6c5c898b3275aebefa6b8a1cb24cd2c6c79397ab15774837a0bc5755" dependencies = [ - "tinyvec_macros", + "winapi-util", ] [[package]] -name = "tinyvec_macros" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" - -[[package]] -name = "tokio" -version = "1.44.2" +name = "textwrap" +version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6b88822cbe49de4185e3a4cbf8321dd487cf5fe0c5c65695fef6346371e9c48" +checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060" dependencies = [ - "backtrace", - "bytes", - "libc", - "mio", - "pin-project-lite", - "socket2", - "windows-sys 0.52.0", + "unicode-width 0.1.14", ] [[package]] -name = "tokio-native-tls" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbae76ab933c85776efabc971569dd6119c580d8f5d448769dec1764bf796ef2" +name = "thfst-tools" +version = "1.0.0-beta.3" dependencies = [ - "native-tls", - "tokio", + "box-format", + "divvunspell", + "serde_json", + "structopt", + "tempfile", ] [[package]] -name = "tokio-util" -version = "0.7.15" +name = "thiserror" +version = "1.0.69" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "66a539a9ad6d5d281510d5bd368c973d636c02dbf8a67300bfb6b950696ad7df" +checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" dependencies = [ - "bytes", - "futures-core", - "futures-sink", - "pin-project-lite", - "tokio", + "thiserror-impl", ] [[package]] -name = "torch-sys" -version = "0.6.1" +name = "thiserror-impl" +version = "1.0.69" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34cc0f21b1aad5d71d529e9fe4dbbbdbf53918d7b4bde946f523839aa32cffae" +checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" dependencies = [ - "anyhow", - "cc", - "curl", - "libc", - "zip 0.5.13", + "proc-macro2", + "quote", + "syn 2.0.106", ] -[[package]] -name = "tower-service" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3" - [[package]] name = "tracing" version = "0.1.41" @@ -2897,36 +1640,24 @@ dependencies = [ [[package]] name = "tracing-attributes" -version = "0.1.28" +version = "0.1.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "395ae124c09f9e6918a2310af6038fba074bcf474ac352496d5910dd59a2226d" +checksum = "81383ab64e72a7a8b8e13130c49e3dab29def6d0c7d76a03087b3cf71c5c6903" dependencies = [ "proc-macro2", "quote", - "syn 2.0.101", + "syn 2.0.106", ] [[package]] name = "tracing-core" -version = "0.1.33" +version = "0.1.34" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e672c95779cf947c5311f83787af4fa8fffd12fb27e4993211a84bdfd9610f9c" +checksum = "b9d12581f227e93f094d3af2ae690a574abb8a2b9b7a96e7cfe9647b2b617678" dependencies = [ "once_cell", ] -[[package]] -name = "try-lock" -version = "0.2.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" - -[[package]] -name = "typenum" -version = "1.18.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1dccffe3ce07af9386bfd29e80c0ab1a8205a2fc34e4bcd40364df902cfa8f3f" - [[package]] name = "unic-char-property" version = "0.9.0" @@ -3141,27 +1872,9 @@ dependencies = [ [[package]] name = "unicode-ident" -version = "1.0.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" - -[[package]] -name = "unicode-normalization" -version = "0.1.24" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5033c97c4262335cded6d6fc3e5c18ab755e1a3dc96376350f3d8e9f009ad956" -dependencies = [ - "tinyvec", -] - -[[package]] -name = "unicode-normalization-alignments" -version = "0.1.12" +version = "1.0.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43f613e4fa046e69818dd287fdc4bc78175ff20331479dab6e1b0f98d57062de" -dependencies = [ - "smallvec", -] +checksum = "f63a545481291138910575129486daeaf8ac54aee4387fe7906919f7830c7d9d" [[package]] name = "unicode-segmentation" @@ -3177,26 +1890,9 @@ checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af" [[package]] name = "unicode-width" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fc81956842c57dac11422a97c3b8195a1ff727f06e85c84ed2e8aa277c9a0fd" - -[[package]] -name = "url" -version = "2.5.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32f8b686cadd1473f4bd0117a5d28d36b1ade384ea9b5069a1c40aefed7fda60" -dependencies = [ - "form_urlencoded", - "idna", - "percent-encoding", -] - -[[package]] -name = "utf16_iter" -version = "1.0.5" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c8232dd3cdaed5356e0f716d285e4b40b932ac434100fe9b7e0e8e935b9e6246" +checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254" [[package]] name = "utf8-decode" @@ -3204,33 +1900,12 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ca61eb27fa339aa08826a29f03e87b99b4d8f0fc2255306fd266bb1b6a9de498" -[[package]] -name = "utf8_iter" -version = "1.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" - [[package]] name = "utf8parse" version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" -[[package]] -name = "uuid" -version = "0.8.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc5cf98d8186244414c848017f0e2676b3fcb46807f6668a97dfe67359a3c4b7" -dependencies = [ - "getrandom 0.2.16", -] - -[[package]] -name = "vcpkg" -version = "0.2.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" - [[package]] name = "vec_map" version = "0.8.2" @@ -3253,80 +1928,62 @@ dependencies = [ "winapi-util", ] -[[package]] -name = "want" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfa7760aed19e106de2c7c0b581b509f2f25d3dacaf737cb82ac61bc6d760b0e" -dependencies = [ - "try-lock", -] - [[package]] name = "wasi" -version = "0.10.0+wasi-snapshot-preview1" +version = "0.11.1+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a143597ca7c7793eff794def352d41792a93c481eb1042423ff7ff72ba2c31f" +checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" [[package]] name = "wasi" -version = "0.11.0+wasi-snapshot-preview1" +version = "0.14.7+wasi-0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" +checksum = "883478de20367e224c0090af9cf5f9fa85bed63a95c1abf3afc5c083ebc06e8c" +dependencies = [ + "wasip2", +] [[package]] -name = "wasi" -version = "0.14.2+wasi-0.2.4" +name = "wasip2" +version = "1.0.1+wasi-0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9683f9a5a998d873c0d21fcbe3c083009670149a8fab228644b8bd36b2c48cb3" +checksum = "0562428422c63773dad2c345a1882263bbf4d65cf3f42e90921f787ef5ad58e7" dependencies = [ - "wit-bindgen-rt", + "wit-bindgen", ] [[package]] name = "wasm-bindgen" -version = "0.2.100" +version = "0.2.104" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1edc8929d7499fc4e8f0be2262a241556cfc54a0bea223790e71446f2aab1ef5" +checksum = "c1da10c01ae9f1ae40cbfac0bac3b1e724b320abfcf52229f80b547c0d250e2d" dependencies = [ "cfg-if", "once_cell", "rustversion", "wasm-bindgen-macro", + "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-backend" -version = "0.2.100" +version = "0.2.104" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f0a0651a5c2bc21487bde11ee802ccaf4c51935d0d3d42a6101f98161700bc6" +checksum = "671c9a5a66f49d8a47345ab942e2cb93c7d1d0339065d4f8139c486121b43b19" dependencies = [ "bumpalo", "log", "proc-macro2", "quote", - "syn 2.0.101", + "syn 2.0.106", "wasm-bindgen-shared", ] -[[package]] -name = "wasm-bindgen-futures" -version = "0.4.50" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "555d470ec0bc3bb57890405e5d4322cc9ea83cebb085523ced7be4144dac1e61" -dependencies = [ - "cfg-if", - "js-sys", - "once_cell", - "wasm-bindgen", - "web-sys", -] - [[package]] name = "wasm-bindgen-macro" -version = "0.2.100" +version = "0.2.104" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fe63fc6d09ed3792bd0897b314f53de8e16568c2b3f7982f468c0bf9bd0b407" +checksum = "7ca60477e4c59f5f2986c50191cd972e3a50d8a95603bc9434501cf156a9a119" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -3334,36 +1991,26 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.100" +version = "0.2.104" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de" +checksum = "9f07d2f20d4da7b26400c9f4a0511e6e0345b040694e8a75bd41d578fa4421d7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.101", + "syn 2.0.106", "wasm-bindgen-backend", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-shared" -version = "0.2.100" +version = "0.2.104" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a05d73b933a847d6cccdda8f838a22ff101ad9bf93e33684f39c1f5f0eece3d" +checksum = "bad67dc8b2a1a6e5448428adec4c3e84c43e561d8c9ee8a9e5aabeb193ec41d1" dependencies = [ "unicode-ident", ] -[[package]] -name = "web-sys" -version = "0.3.77" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33b6dd2ef9186f1f2072e409e99cd22a975331a6b3591b12c764e0e55c60d5d2" -dependencies = [ - "js-sys", - "wasm-bindgen", -] - [[package]] name = "winapi" version = "0.3.9" @@ -3382,11 +2029,11 @@ checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" [[package]] name = "winapi-util" -version = "0.1.9" +version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" +checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" dependencies = [ - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -3406,9 +2053,9 @@ dependencies = [ [[package]] name = "windows-core" -version = "0.61.0" +version = "0.62.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4763c1de310c86d75a878046489e2e5ba02c649d185f21c67d4cf8a56d098980" +checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb" dependencies = [ "windows-implement", "windows-interface", @@ -3419,90 +2066,75 @@ dependencies = [ [[package]] name = "windows-implement" -version = "0.60.0" +version = "0.60.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a47fddd13af08290e67f4acabf4b459f647552718f683a7b415d290ac744a836" +checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf" dependencies = [ "proc-macro2", "quote", - "syn 2.0.101", + "syn 2.0.106", ] [[package]] name = "windows-interface" -version = "0.59.1" +version = "0.59.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd9211b69f8dcdfa817bfd14bf1c97c9188afa36f4750130fcdf3f400eca9fa8" +checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358" dependencies = [ "proc-macro2", "quote", - "syn 2.0.101", + "syn 2.0.106", ] [[package]] name = "windows-link" -version = "0.1.1" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76840935b766e1b0a05c0066835fb9ec80071d4c09a16f6bd5f7e655e3c14c38" +checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" [[package]] name = "windows-result" -version = "0.3.2" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c64fd11a4fd95df68efcfee5f44a294fe71b8bc6a91993e2791938abcc712252" +checksum = "7781fa89eaf60850ac3d2da7af8e5242a5ea78d1a11c49bf2910bb5a73853eb5" dependencies = [ "windows-link", ] [[package]] name = "windows-strings" -version = "0.4.0" +version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a2ba9642430ee452d5a7aa78d72907ebe8cfda358e8cb7918a2050581322f97" +checksum = "7837d08f69c77cf6b07689544538e017c1bfcf57e34b4c0ff58e6c2cd3b37091" dependencies = [ "windows-link", ] [[package]] name = "windows-sys" -version = "0.48.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" -dependencies = [ - "windows-targets 0.48.5", -] - -[[package]] -name = "windows-sys" -version = "0.52.0" +version = "0.59.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" dependencies = [ "windows-targets 0.52.6", ] [[package]] name = "windows-sys" -version = "0.59.0" +version = "0.60.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb" dependencies = [ - "windows-targets 0.52.6", + "windows-targets 0.53.5", ] [[package]] -name = "windows-targets" -version = "0.48.5" +name = "windows-sys" +version = "0.61.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" +checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" dependencies = [ - "windows_aarch64_gnullvm 0.48.5", - "windows_aarch64_msvc 0.48.5", - "windows_i686_gnu 0.48.5", - "windows_i686_msvc 0.48.5", - "windows_x86_64_gnu 0.48.5", - "windows_x86_64_gnullvm 0.48.5", - "windows_x86_64_msvc 0.48.5", + "windows-link", ] [[package]] @@ -3514,7 +2146,7 @@ dependencies = [ "windows_aarch64_gnullvm 0.52.6", "windows_aarch64_msvc 0.52.6", "windows_i686_gnu 0.52.6", - "windows_i686_gnullvm", + "windows_i686_gnullvm 0.52.6", "windows_i686_msvc 0.52.6", "windows_x86_64_gnu 0.52.6", "windows_x86_64_gnullvm 0.52.6", @@ -3522,10 +2154,21 @@ dependencies = [ ] [[package]] -name = "windows_aarch64_gnullvm" -version = "0.48.5" +name = "windows-targets" +version = "0.53.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" +checksum = "4945f9f551b88e0d65f3db0bc25c33b8acea4d9e41163edf90dcd0b19f9069f3" +dependencies = [ + "windows-link", + "windows_aarch64_gnullvm 0.53.1", + "windows_aarch64_msvc 0.53.1", + "windows_i686_gnu 0.53.1", + "windows_i686_gnullvm 0.53.1", + "windows_i686_msvc 0.53.1", + "windows_x86_64_gnu 0.53.1", + "windows_x86_64_gnullvm 0.53.1", + "windows_x86_64_msvc 0.53.1", +] [[package]] name = "windows_aarch64_gnullvm" @@ -3534,10 +2177,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" [[package]] -name = "windows_aarch64_msvc" -version = "0.48.5" +name = "windows_aarch64_gnullvm" +version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" +checksum = "a9d8416fa8b42f5c947f8482c43e7d89e73a173cead56d044f6a56104a6d1b53" [[package]] name = "windows_aarch64_msvc" @@ -3546,10 +2189,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" [[package]] -name = "windows_i686_gnu" -version = "0.48.5" +name = "windows_aarch64_msvc" +version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" +checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006" [[package]] name = "windows_i686_gnu" @@ -3557,6 +2200,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" +[[package]] +name = "windows_i686_gnu" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "960e6da069d81e09becb0ca57a65220ddff016ff2d6af6a223cf372a506593a3" + [[package]] name = "windows_i686_gnullvm" version = "0.52.6" @@ -3564,10 +2213,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" [[package]] -name = "windows_i686_msvc" -version = "0.48.5" +name = "windows_i686_gnullvm" +version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" +checksum = "fa7359d10048f68ab8b09fa71c3daccfb0e9b559aed648a8f95469c27057180c" [[package]] name = "windows_i686_msvc" @@ -3576,10 +2225,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" [[package]] -name = "windows_x86_64_gnu" -version = "0.48.5" +name = "windows_i686_msvc" +version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" +checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2" [[package]] name = "windows_x86_64_gnu" @@ -3588,10 +2237,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" [[package]] -name = "windows_x86_64_gnullvm" -version = "0.48.5" +name = "windows_x86_64_gnu" +version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" +checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499" [[package]] name = "windows_x86_64_gnullvm" @@ -3600,10 +2249,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" [[package]] -name = "windows_x86_64_msvc" -version = "0.48.5" +name = "windows_x86_64_gnullvm" +version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" +checksum = "0ffa179e2d07eee8ad8f57493436566c7cc30ac536a3379fdf008f47f6bb7ae1" [[package]] name = "windows_x86_64_msvc" @@ -3612,138 +2261,22 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" [[package]] -name = "winreg" -version = "0.50.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "524e57b2c537c0f9b1e69f1965311ec12182b4122e45035b1508cd24d2adadb1" -dependencies = [ - "cfg-if", - "windows-sys 0.48.0", -] - -[[package]] -name = "wit-bindgen-rt" -version = "0.39.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1" -dependencies = [ - "bitflags 2.9.0", -] - -[[package]] -name = "write16" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d1890f4022759daae28ed4fe62859b1236caebfc61ede2f63ed4e695f3f6d936" - -[[package]] -name = "writeable" -version = "0.5.5" +name = "windows_x86_64_msvc" +version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e9df38ee2d2c3c5948ea468a8406ff0db0b29ae1ffde1bcf20ef305bcc95c51" +checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650" [[package]] -name = "xattr" -version = "1.5.0" +name = "wit-bindgen" +version = "0.46.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d65cbf2f12c15564212d48f4e3dfb87923d25d611f2aed18f4cb23f0413d89e" -dependencies = [ - "libc", - "rustix", -] +checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59" [[package]] name = "xml-rs" -version = "0.8.26" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a62ce76d9b56901b19a74f19431b0d8b3bc7ca4ad685a746dfd78ca8f4fc6bda" - -[[package]] -name = "yoke" -version = "0.7.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "120e6aef9aa629e3d4f52dc8cc43a015c7724194c97dfaf45180d2daf2b77f40" -dependencies = [ - "serde", - "stable_deref_trait", - "yoke-derive", - "zerofrom", -] - -[[package]] -name = "yoke-derive" -version = "0.7.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2380878cad4ac9aac1e2435f3eb4020e8374b5f13c296cb75b4620ff8e229154" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.101", - "synstructure", -] - -[[package]] -name = "zerocopy" -version = "0.8.25" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1702d9583232ddb9174e01bb7c15a2ab8fb1bc6f227aa1233858c351a3ba0cb" -dependencies = [ - "zerocopy-derive", -] - -[[package]] -name = "zerocopy-derive" -version = "0.8.25" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "28a6e20d751156648aa063f3800b706ee209a32c0b4d9f24be3d980b01be55ef" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.101", -] - -[[package]] -name = "zerofrom" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50cc42e0333e05660c3587f3bf9d0478688e15d870fab3346451ce7f8c9fbea5" -dependencies = [ - "zerofrom-derive", -] - -[[package]] -name = "zerofrom-derive" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.101", - "synstructure", -] - -[[package]] -name = "zerovec" -version = "0.10.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aa2b893d79df23bfb12d5461018d408ea19dfafe76c2c7ef6d4eba614f8ff079" -dependencies = [ - "yoke", - "zerofrom", - "zerovec-derive", -] - -[[package]] -name = "zerovec-derive" -version = "0.10.3" +version = "0.8.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6eafa6dfb17584ea3e2bd6e76e0cc15ad7af12b09abdd1ca55961bed9b1063c6" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.101", -] +checksum = "6fd8403733700263c6eb89f192880191f1b83e332f7a20371ddcf421c4a337c7" [[package]] name = "zip" @@ -3752,49 +2285,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "93ab48844d61251bb3835145c521d88aa4031d7139e8485990f60ca911fa0815" dependencies = [ "byteorder", - "bzip2", "crc32fast", "flate2", "thiserror", - "time 0.1.45", -] - -[[package]] -name = "zip" -version = "0.6.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "760394e246e4c28189f19d488c058bf16f564016aefac5d32bb1f3b51d5e9261" -dependencies = [ - "aes", - "byteorder", - "bzip2", - "constant_time_eq", - "crc32fast", - "crossbeam-utils", - "flate2", - "hmac", - "pbkdf2", - "sha1", - "time 0.3.41", - "zstd 0.11.2+zstd.1.5.2", -] - -[[package]] -name = "zip-extensions" -version = "0.6.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cecf62554c4ff96bce01a7ef123d160c3ffe9180638820f8b4d545c65b221b8c" -dependencies = [ - "zip 0.6.6", -] - -[[package]] -name = "zstd" -version = "0.11.2+zstd.1.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "20cc960326ece64f010d2d2107537f26dc589a6573a316bd5b1dba685fa5fde4" -dependencies = [ - "zstd-safe 5.0.2+zstd.1.5.2", ] [[package]] @@ -3803,17 +2296,7 @@ version = "0.13.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e91ee311a569c327171651566e07972200e76fcfe2242a4fa446149a3881c08a" dependencies = [ - "zstd-safe 7.2.4", -] - -[[package]] -name = "zstd-safe" -version = "5.0.2+zstd.1.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d2a5585e04f9eea4b2a3d1eca508c4dee9592a89ef6f450c11719da0726f4db" -dependencies = [ - "libc", - "zstd-sys", + "zstd-safe", ] [[package]] @@ -3827,9 +2310,9 @@ dependencies = [ [[package]] name = "zstd-sys" -version = "2.0.15+zstd.1.5.7" +version = "2.0.16+zstd.1.5.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb81183ddd97d0c74cedf1d50d85c8d08c1b8b68ee863bdee9e706eedba1a237" +checksum = "91e19ebc2adc8f83e43039e79776e3fda8ca919132d68a1fed6a5faca2683748" dependencies = [ "cc", "pkg-config", diff --git a/README.md b/README.md index afec627..046b4be 100644 --- a/README.md +++ b/README.md @@ -17,17 +17,6 @@ cargo install thfst-tools cargo install --path . ``` -### Building with `gpt2` support on macOS aarch64 - -(Skip this if you are not experimenting with gpt2 support. So skip. Now.) - -Clone this repo then: - -```bash -brew install libtorch -LIBTORCH=/opt/homebrew/opt/libtorch cargo build --features gpt2 --bin divvunspell -``` - ### No Rust? ```sh @@ -49,7 +38,6 @@ Optional arguments: Available subcommands: suggest get suggestions for provided input tokenize print input in word-separated tokenized form - predict predict next words using GPT2 model $ divvunspell suggest -h Usage: divvunspell suggest [OPTIONS] diff --git a/divvunspell-bin/Cargo.toml b/divvunspell-bin/Cargo.toml index 8a997ab..28403a3 100644 --- a/divvunspell-bin/Cargo.toml +++ b/divvunspell-bin/Cargo.toml @@ -20,7 +20,3 @@ pretty_env_logger = "0.5.0" gumdrop = "0.8.0" anyhow = "1.0.32" structopt = "0.3.17" - -[features] -default = [] -gpt2 = ["divvunspell/gpt2"] diff --git a/divvunspell-bin/src/main.rs b/divvunspell-bin/src/main.rs index 14a34dd..7ca1c25 100644 --- a/divvunspell-bin/src/main.rs +++ b/divvunspell-bin/src/main.rs @@ -12,17 +12,12 @@ use divvunspell::vfs::Fs; use gumdrop::Options; use serde::Serialize; -#[cfg(feature = "gpt2")] -use divvunspell::archive::{ - boxf::BoxGpt2PredictorArchive, error::PredictorArchiveError, PredictorArchive, -}; - use divvunspell::{ archive::{ boxf::ThfstBoxSpellerArchive, error::SpellerArchiveError, BoxSpellerArchive, SpellerArchive, ZipSpellerArchive, }, - speller::{suggestion::Suggestion, Analyzer, SpellerConfig}, + speller::{suggestion::Suggestion, Speller, SpellerConfig}, tokenizer::Tokenize, }; @@ -31,7 +26,6 @@ trait OutputWriter { fn write_suggestions(&mut self, word: &str, suggestions: &[Suggestion]); fn write_input_analyses(&mut self, word: &str, analyses: &[Suggestion]); fn write_output_analyses(&mut self, word: &str, analyses: &[Suggestion]); - fn write_predictions(&mut self, predictions: &[String]); fn finish(&mut self); } @@ -65,11 +59,6 @@ impl OutputWriter for StdoutWriter { println!(); } - fn write_predictions(&mut self, predictions: &[String]) { - println!("Predictions: "); - println!("{}", predictions.join(" ")); - } - fn write_input_analyses(&mut self, _word: &str, suggestions: &[Suggestion]) { println!("Input analyses: "); for sugg in suggestions { @@ -108,8 +97,6 @@ struct JsonWriter { #[serde(skip_serializing_if = "Vec::is_empty")] suggest: Vec, #[serde(skip_serializing_if = "Vec::is_empty")] - predict: Vec, - #[serde(skip_serializing_if = "Vec::is_empty")] input_analysis: Vec, #[serde(skip_serializing_if = "Vec::is_empty")] output_analysis: Vec, @@ -135,10 +122,6 @@ impl OutputWriter for JsonWriter { self.suggest[i].suggestions = suggestions.to_vec(); } - fn write_predictions(&mut self, predictions: &[String]) { - self.predict = predictions.to_vec(); - } - fn write_input_analyses(&mut self, word: &str, suggestions: &[Suggestion]) { self.input_analysis.push(AnalysisRequest { word: word.to_string(), @@ -159,7 +142,7 @@ impl OutputWriter for JsonWriter { } fn run( - speller: Arc, + speller: Arc, words: Vec, writer: &mut dyn OutputWriter, is_analyzing: bool, @@ -189,7 +172,7 @@ fn run( let final_suggs = speller .clone() - .analyse_suggest_with_config(&word, &suggest_cfg); + .analyze_suggest_with_config(&word, &suggest_cfg); writer.write_suggestions(&word, &final_suggs); } } @@ -210,9 +193,6 @@ enum Command { #[options(help = "print input in word-separated tokenized form")] Tokenize(TokenizeArgs), - - #[options(help = "predict next words using GPT2 model")] - Predict(PredictArgs), } #[derive(Debug, Options)] @@ -241,7 +221,7 @@ struct SuggestArgs { #[options(help = "maximum number of results")] nbest: Option, - #[options(help = "character for incomplete predictions")] + #[options(help = "character for incomplete suggestions")] continuation_marker: Option, #[options( @@ -280,31 +260,6 @@ struct TokenizeArgs { inputs: Vec, } -#[derive(Debug, Options)] -struct PredictArgs { - #[options(help = "print help message")] - help: bool, - - #[options(help = "BHFST archive to be used", required)] - archive: PathBuf, - - #[options( - short = "n", - long = "name", - help = "Predictor name to use (default: gpt2_predictor)" - )] - predictor_name: Option, - - #[options(help = "whether suggestions should not be validated against a speller")] - disable_spelling_validation: bool, - - #[options(no_short, long = "json", help = "output in JSON format")] - use_json: bool, - - #[options(free, help = "text to be tokenized")] - inputs: Vec, -} - fn tokenize(args: TokenizeArgs) -> anyhow::Result<()> { let inputs: String = if args.inputs.is_empty() { eprintln!("Reading from stdin..."); @@ -383,11 +338,11 @@ fn suggest(args: SuggestArgs) -> anyhow::Result<()> { let archive = load_archive(&archive_path)?; // 2. config from metadata if let Some(metadata) = archive.metadata() { - if let Some(continuation) = &metadata.acceptor.continuation { - suggest_cfg.continuation_marker = Some(continuation.clone()); + if let Some(continuation) = metadata.acceptor().continuation() { + suggest_cfg.continuation_marker = Some(continuation.to_string()); } } - let speller = archive.analyser(); + let speller = archive.speller(); speller } else if let (Some(lexicon_path), Some(mutator_path)) = (args.lexicon_path, args.mutator_path) { @@ -451,7 +406,6 @@ fn suggest(args: SuggestArgs) -> anyhow::Result<()> { args.inputs.into_iter().collect() }; - run( speller, words, @@ -467,76 +421,6 @@ fn suggest(args: SuggestArgs) -> anyhow::Result<()> { Ok(()) } -#[cfg(feature = "gpt2")] -fn load_predictor_archive( - path: &Path, - name: Option<&str>, -) -> Result, PredictorArchiveError> { - let archive = BoxGpt2PredictorArchive::open(path, name)?; - let archive = Box::new(archive); - Ok(archive) -} - -#[cfg(feature = "gpt2")] -fn predict(args: PredictArgs) -> anyhow::Result<()> { - let raw_input = if args.inputs.is_empty() { - eprintln!("Reading from stdin..."); - let mut buffer = String::new(); - io::stdin() - .read_to_string(&mut buffer) - .expect("reading stdin"); - buffer - } else { - args.inputs.join(" ") - }; - - let predictor_name = args.predictor_name.as_deref(); - let archive = load_predictor_archive(&args.archive, predictor_name)?; - let predictor = archive.predictor(); - - let mut writer: Box = if args.use_json { - Box::new(JsonWriter::new()) - } else { - Box::new(StdoutWriter) - }; - - let suggest_cfg = SpellerConfig::default(); - - let predictions = predictor.predict(&raw_input); - writer.write_predictions(&predictions); - - let has_speller = archive.metadata().map(|x| x.speller).unwrap_or(false); - if !args.disable_spelling_validation { - if !has_speller { - eprintln!("Error: requested spell checking but no speller present in archive!"); - } else { - let speller_archive = load_archive(&args.archive)?; - let speller = speller_archive.speller(); - - for word in predictions { - let cleaned_str = word.as_str().word_indices(); - for w in cleaned_str { - let is_correct = speller.clone().is_correct_with_config(&w.1, &suggest_cfg); - writer.write_correction(w.1, is_correct); - } - } - } - }; - - Ok(()) -} - -#[cfg(not(feature = "gpt2"))] -fn predict(_args: PredictArgs) -> anyhow::Result<()> { - eprintln!("ERROR: DivvunSpell was built without GPT2 support."); - eprintln!("If you built this using cargo, re-run the build with the following:"); - eprintln!(""); - eprintln!(" cargo build --features gpt2"); - eprintln!(""); - - std::process::exit(1); -} - fn main() -> anyhow::Result<()> { pretty_env_logger::init(); @@ -546,6 +430,5 @@ fn main() -> anyhow::Result<()> { None => Ok(()), Some(Command::Suggest(args)) => suggest(args), Some(Command::Tokenize(args)) => tokenize(args), - Some(Command::Predict(args)) => predict(args), } } diff --git a/divvunspell/Cargo.toml b/divvunspell/Cargo.toml index 3dbf2e3..bae4123 100644 --- a/divvunspell/Cargo.toml +++ b/divvunspell/Cargo.toml @@ -37,9 +37,6 @@ unic-ucd-common = "0.9.0" flatbuffers = { version = "0.6.1", optional = true } env_logger = { version = "0.11.2", optional = true } thiserror = "1.0.20" -tch = { version = "0.6.1", optional = true } -rust-bert = { version = "0.17.0", optional = true } -rust_tokenizers = { version = "7", optional = true } tempfile = "3.3.0" fs_extra = "1.2.0" eieio = "1.0.0" @@ -50,7 +47,6 @@ globwalk = "0.9.1" [features] compression = ["zip/deflate"] logging = ["env_logger"] -gpt2 = ["tch", "rust-bert", "rust_tokenizers"] cargo-clippy = [] # Internal features: unstable, not for external use! diff --git a/divvunspell/src/archive/boxf.rs b/divvunspell/src/archive/boxf.rs index e2328e2..6520a76 100644 --- a/divvunspell/src/archive/boxf.rs +++ b/divvunspell/src/archive/boxf.rs @@ -3,15 +3,9 @@ use std::sync::Arc; use box_format::BoxFileReader; -#[cfg(feature = "gpt2")] -use tempfile::TempDir; - -#[cfg(feature = "gpt2")] -use super::{error::PredictorArchiveError, meta::PredictorMetadata, PredictorArchive}; - use super::error::SpellerArchiveError; use super::{meta::SpellerMetadata, SpellerArchive}; -use crate::speller::{HfstSpeller, Speller, Analyzer}; +use crate::speller::{HfstSpeller, Speller}; use crate::transducer::{ thfst::{MemmapThfstChunkedTransducer, MemmapThfstTransducer}, Transducer, @@ -97,71 +91,7 @@ where self.speller.clone() } - fn analyser(&self) -> Arc { - self.speller.clone() - } - fn metadata(&self) -> Option<&SpellerMetadata> { self.metadata.as_ref() } } - -#[cfg(feature = "gpt2")] -pub struct BoxGpt2PredictorArchive { - #[allow(unused)] - model_path: std::path::PathBuf, - model: Arc, - _temp_dir: TempDir, // necessary to keep the temp dir alive until dropped - metadata: Option, -} - -#[cfg(feature = "gpt2")] -impl PredictorArchive for BoxGpt2PredictorArchive { - fn open( - path: &std::path::Path, - predictor_name: Option<&str>, - ) -> Result - where - Self: Sized, - { - let archive = BoxFileReader::open(path).map_err(|e| { - PredictorArchiveError::File(std::io::Error::new(std::io::ErrorKind::Other, e)) - })?; - let fs = BoxFilesystem::new(&archive); - - let predictor_name = predictor_name.unwrap_or("gpt2_predictor"); - let predictor_path = std::path::Path::new(predictor_name); - - // TODO: make this name customizable via metadata? - let file = fs - .open_file(predictor_path.join("meta.json")) - .map_err(|e| PredictorArchiveError::Io("Could not load meta.json".into(), e))?; - - let metadata = serde_json::from_reader(file)?; - - let temp_dir = fs.copy_to_temp_dir(&predictor_path).map_err(|e| { - PredictorArchiveError::Io( - format!("Could not copy '{}' to temp directory", predictor_name), - e, - ) - })?; - let model_path = temp_dir.path().join(&predictor_path); - - let model = Arc::new(crate::predictor::gpt2::Gpt2Predictor::new(&model_path)?); - - Ok(BoxGpt2PredictorArchive { - model_path, - model, - _temp_dir: temp_dir, - metadata, - }) - } - - fn predictor(&self) -> Arc { - self.model.clone() - } - - fn metadata(&self) -> Option<&PredictorMetadata> { - self.metadata.as_ref() - } -} diff --git a/divvunspell/src/archive/error.rs b/divvunspell/src/archive/error.rs index d037ebb..6f3b35c 100644 --- a/divvunspell/src/archive/error.rs +++ b/divvunspell/src/archive/error.rs @@ -1,59 +1,37 @@ //! Archive-related errors. use std::{ffi::OsString, io::Error}; -#[cfg(feature = "gpt2")] -use rust_bert::RustBertError; - use crate::transducer::TransducerError; +/// Errors that can occur when opening or using a speller archive. #[derive(Debug, thiserror::Error)] +#[non_exhaustive] pub enum SpellerArchiveError { + /// Error opening or reading the archive file #[error("File error")] File(#[source] Error), + /// I/O error while reading archive contents #[error("IO error")] Io(String, #[source] eieio::Error), + /// Error loading or parsing a transducer from the archive #[error("Transducer error")] Transducer(#[source] TransducerError), + /// Archive is missing required metadata #[error("Missing metadata")] NoMetadata, + /// Archive uses unsupported compression #[error("Unsupported compression")] UnsupportedCompressed, + /// Unknown error code encountered #[error("Unknown error code {0}")] Unknown(u8), - #[error("Unsupported file extension: {0:?}")] - UnsupportedExt(OsString), -} - -#[derive(Debug, thiserror::Error)] -pub enum PredictorArchiveError { - #[error("File error")] - File(#[source] Error), - - #[error("IO error")] - Io(String, #[source] Error), - - #[cfg(feature = "gpt2")] - #[error("Error loading bert model")] - Bert(#[from] RustBertError), - - #[error("Error deserialising JSON")] - Json(#[from] serde_json::Error), - - #[error("Missing metadata")] - NoMetadata, - - #[error("Unsupported compression")] - UnsupportedCompressed, - - #[error("Unknown error code {0}")] - Unknown(u8), - + /// File has an unsupported extension (expected .zhfst or .bhfst) #[error("Unsupported file extension: {0:?}")] UnsupportedExt(OsString), } diff --git a/divvunspell/src/archive/meta.rs b/divvunspell/src/archive/meta.rs index afbe290..0937962 100644 --- a/divvunspell/src/archive/meta.rs +++ b/divvunspell/src/archive/meta.rs @@ -9,19 +9,48 @@ use serde_xml_rs::{from_reader, Error, ParserConfig}; #[derive(Serialize, Deserialize, Debug, Clone)] pub struct SpellerMetadata { /// speller info - pub info: SpellerMetadataInfo, + info: SpellerMetadataInfo, /// acceptor metadata - pub acceptor: SpellerMetadataAcceptor, + acceptor: SpellerMetadataAcceptor, /// error model metadata - pub errmodel: SpellerMetadataErrmodel, + errmodel: SpellerMetadataErrmodel, } -/// Predictor metadata -#[derive(Serialize, Deserialize, Debug, Default, Clone)] -pub struct PredictorMetadata { - /// whether speller is - #[serde(default)] - pub speller: bool, +impl SpellerMetadata { + /// Get the speller information + pub fn info(&self) -> &SpellerMetadataInfo { + &self.info + } + + /// Get the acceptor metadata + pub fn acceptor(&self) -> &SpellerMetadataAcceptor { + &self.acceptor + } + + /// Get the error model metadata + pub fn errmodel(&self) -> &SpellerMetadataErrmodel { + &self.errmodel + } + + /// Get mutable reference to acceptor metadata + /// + /// # Warning + /// This method is only for internal tooling use and should not be used in normal applications. + /// It may be removed in a future version. + #[doc(hidden)] + pub fn acceptor_mut(&mut self) -> &mut SpellerMetadataAcceptor { + &mut self.acceptor + } + + /// Get mutable reference to error model metadata + /// + /// # Warning + /// This method is only for internal tooling use and should not be used in normal applications. + /// It may be removed in a future version. + #[doc(hidden)] + pub fn errmodel_mut(&mut self) -> &mut SpellerMetadataErrmodel { + &mut self.errmodel + } } /// localised speller title @@ -38,13 +67,35 @@ pub struct SpellerTitle { #[derive(Serialize, Deserialize, Debug, Clone)] pub struct SpellerMetadataInfo { /// ISO-639 code of speller language - pub locale: String, + locale: String, /// localised, human readable titles of speller - pub title: Vec, + title: Vec, /// human readable description of speller - pub description: String, + description: String, /// creator and copyright owner of the speller - pub producer: String, + producer: String, +} + +impl SpellerMetadataInfo { + /// Get the ISO-639 locale code + pub fn locale(&self) -> &str { + &self.locale + } + + /// Get the localized titles + pub fn title(&self) -> &[SpellerTitle] { + &self.title + } + + /// Get the description + pub fn description(&self) -> &str { + &self.description + } + + /// Get the producer/creator + pub fn producer(&self) -> &str { + &self.producer + } } /// Acceptor metadata @@ -52,28 +103,92 @@ pub struct SpellerMetadataInfo { pub struct SpellerMetadataAcceptor { /// acceptor type: /// - `blah` if normal dictionary automaton - /// - `foo` if analyser + /// - `foo` if analyzer #[serde(rename = "type", default)] - pub type_: String, + type_: String, /// locally unique id for this acceptor - pub id: String, + id: String, /// localised human readable titles of speller - pub title: Vec, + title: Vec, /// human readable description of the acceptor - pub description: String, + description: String, /// marker for incomplete strings - pub continuation: Option, + continuation: Option, +} + +impl SpellerMetadataAcceptor { + /// Get the acceptor type + pub fn type_(&self) -> &str { + &self.type_ + } + + /// Get the acceptor ID + pub fn id(&self) -> &str { + &self.id + } + + /// Get the localized titles + pub fn title(&self) -> &[SpellerTitle] { + &self.title + } + + /// Get the description + pub fn description(&self) -> &str { + &self.description + } + + /// Get the continuation marker for incomplete strings + pub fn continuation(&self) -> Option<&str> { + self.continuation.as_deref() + } + + /// Set the acceptor ID + /// + /// # Warning + /// This method is only for internal tooling use and should not be used in normal applications. + /// It may be removed in a future version. + #[doc(hidden)] + pub fn set_id(&mut self, id: String) { + self.id = id; + } } /// Error model metadata #[derive(Serialize, Deserialize, Debug, Clone)] pub struct SpellerMetadataErrmodel { /// locally unique id for the error model - pub id: String, + id: String, /// localised human readable titles for the error model - pub title: Vec, + title: Vec, /// human readable description of the error model - pub description: String, + description: String, +} + +impl SpellerMetadataErrmodel { + /// Get the error model ID + pub fn id(&self) -> &str { + &self.id + } + + /// Get the localized titles + pub fn title(&self) -> &[SpellerTitle] { + &self.title + } + + /// Get the description + pub fn description(&self) -> &str { + &self.description + } + + /// Set the error model ID + /// + /// # Warning + /// This method is only for internal tooling use and should not be used in normal applications. + /// It may be removed in a future version. + #[doc(hidden)] + pub fn set_id(&mut self, id: String) { + self.id = id; + } } impl std::str::FromStr for SpellerMetadata { @@ -97,19 +212,6 @@ impl SpellerMetadata { } } -impl PredictorMetadata { - pub fn from_bytes(bytes: &[u8]) -> Result { - let mut reader = ParserConfig::new() - .trim_whitespace(true) - .ignore_comments(true) - .coalesce_characters(true) - .create_reader(bytes) - .into_inner(); - - from_reader(&mut reader) - } -} - #[test] fn test_xml_parse() { use std::str::FromStr; diff --git a/divvunspell/src/archive/mod.rs b/divvunspell/src/archive/mod.rs index 10d3631..fc9746e 100644 --- a/divvunspell/src/archive/mod.rs +++ b/divvunspell/src/archive/mod.rs @@ -7,16 +7,12 @@ pub mod error; pub mod meta; pub mod zip; -use error::PredictorArchiveError; - pub use self::{boxf::BoxSpellerArchive, zip::ZipSpellerArchive}; use self::{ - boxf::ThfstChunkedBoxSpellerArchive, - error::SpellerArchiveError, - meta::{PredictorMetadata, SpellerMetadata}, + boxf::ThfstChunkedBoxSpellerArchive, error::SpellerArchiveError, meta::SpellerMetadata, }; -use crate::{predictor::Predictor, speller::{Speller, Analyzer}}; +use crate::speller::Speller; pub(crate) struct TempMmap { mmap: Arc, @@ -46,24 +42,14 @@ pub trait SpellerArchive { where Self: Sized; - /// retrieve spell-checker. + /// Retrieve spell-checker. + /// + /// The returned speller can perform both spell checking and morphological analysis + /// depending on the `OutputMode` passed to `suggest()`. fn speller(&self) -> Arc; - fn analyser(&self) -> Arc; - /// retrieve metadata. - fn metadata(&self) -> Option<&SpellerMetadata>; -} -/// Predictor archive is a file read intoo a predictor with metadata. -pub trait PredictorArchive { - /// Read and parse a predictor archive. - fn open(path: &Path, predictor_name: Option<&str>) -> Result - where - Self: Sized; - - /// Retrieve predictor. - fn predictor(&self) -> Arc; - /// retrieve metadata. - fn metadata(&self) -> Option<&PredictorMetadata>; + /// Retrieve metadata. + fn metadata(&self) -> Option<&SpellerMetadata>; } /// Reads a speller archive. @@ -113,7 +99,7 @@ pub(crate) mod ffi { >, ) -> Result> { match handle.metadata() { - Some(v) => Ok(v.info.locale.to_string()), + Some(v) => Ok(v.info().locale().to_string()), None => Err(Box::new(SpellerArchiveError::NoMetadata) as _), } } diff --git a/divvunspell/src/archive/zip.rs b/divvunspell/src/archive/zip.rs index 48d7cc9..710a729 100644 --- a/divvunspell/src/archive/zip.rs +++ b/divvunspell/src/archive/zip.rs @@ -9,7 +9,7 @@ use std::sync::Arc; use super::error::SpellerArchiveError; use super::meta::SpellerMetadata; use super::{MmapRef, SpellerArchive, TempMmap}; -use crate::speller::{HfstSpeller, Speller, Analyzer}; +use crate::speller::{HfstSpeller, Speller}; use crate::transducer::hfst::HfstTransducer; pub type HfstZipSpeller = @@ -82,8 +82,8 @@ impl SpellerArchive for ZipSpellerArchive { .map_err(|e| SpellerArchiveError::Io("index.xml".into(), e.into()))?; let metadata = SpellerMetadata::from_bytes(&*metadata_mmap.map()).expect("meta"); - let acceptor_id = &metadata.acceptor.id; - let errmodel_id = &metadata.errmodel.id; + let acceptor_id = metadata.acceptor().id(); + let errmodel_id = metadata.errmodel().id(); let acceptor_mmap = mmap_by_name(&mut file, &mut archive, &acceptor_id) .map_err(|e| SpellerArchiveError::Io(acceptor_id.into(), e.into()))?; @@ -103,10 +103,6 @@ impl SpellerArchive for ZipSpellerArchive { self.speller.clone() } - fn analyser(&self) -> Arc { - self.speller.clone() - } - fn metadata(&self) -> Option<&SpellerMetadata> { Some(&self.metadata) } diff --git a/divvunspell/src/constants.rs b/divvunspell/src/constants.rs index 1d75d2a..acb5b17 100644 --- a/divvunspell/src/constants.rs +++ b/divvunspell/src/constants.rs @@ -1,6 +1,6 @@ -pub const INDEX_TABLE_SIZE: usize = 6; -pub const TRANS_TABLE_SIZE: usize = 12; -pub const TARGET_TABLE: u32 = 2_147_483_648; +pub(crate) const INDEX_TABLE_SIZE: usize = 6; +pub(crate) const TRANS_TABLE_SIZE: usize = 12; +pub(crate) const TARGET_TABLE: u32 = 2_147_483_648; #[cfg(test)] mod tests { diff --git a/divvunspell/src/lib.rs b/divvunspell/src/lib.rs index 7fddde2..5626f11 100644 --- a/divvunspell/src/lib.rs +++ b/divvunspell/src/lib.rs @@ -33,11 +33,20 @@ pub mod archive; pub mod ffi; pub mod paths; -pub mod predictor; pub mod speller; pub mod tokenizer; pub mod transducer; + +/// Virtual filesystem abstraction (internal use only) +/// +/// **Warning:** This module is only for internal tooling use and should not be used in normal applications. +/// It may be removed or significantly changed in a future version without a major version bump. +/// Use the higher-level [`archive`] module APIs instead. +#[doc(hidden)] pub mod vfs; pub(crate) mod constants; -pub(crate) mod types; +/// Core types for transducers and spell-checking. +/// +/// This module contains type aliases and enums used throughout the transducer API. +pub mod types; diff --git a/divvunspell/src/predictor/gpt2.rs b/divvunspell/src/predictor/gpt2.rs deleted file mode 100644 index c76a731..0000000 --- a/divvunspell/src/predictor/gpt2.rs +++ /dev/null @@ -1,58 +0,0 @@ -use std::path::Path; -use std::sync::Arc; - -use parking_lot::Mutex; -use rust_bert::pipelines::common::ModelType; -use rust_bert::pipelines::text_generation::{TextGenerationConfig, TextGenerationModel}; -use rust_bert::resources::{LocalResource, Resource}; -use rust_bert::RustBertError; - -use super::Predictor; - -pub struct Gpt2Predictor { - model: Mutex, -} - -impl Gpt2Predictor { - pub fn new(model_path: &Path) -> Result { - let config_resource = Resource::Local(LocalResource { - local_path: model_path.join("config.json"), - }); - let vocab_resource = Resource::Local(LocalResource { - local_path: model_path.join("vocab.json"), - }); - let merges_resource = Resource::Local(LocalResource { - local_path: model_path.join("merges.txt"), - }); - let weights_resource = Resource::Local(LocalResource { - local_path: model_path.join("rust_model.ot"), - }); - - let generate_config = TextGenerationConfig { - model_resource: weights_resource, - vocab_resource: vocab_resource, - merges_resource: merges_resource, - config_resource: config_resource, - model_type: ModelType::GPT2, - max_length: 24, - do_sample: true, - num_beams: 1, - temperature: 1.1, - num_return_sequences: 1, - ..Default::default() - }; - let model = Mutex::new(TextGenerationModel::new(generate_config)?); - Ok(Self { model }) - } - - fn generate(&self, raw_input: &str) -> Vec { - let guard = self.model.lock(); - guard.generate(&[raw_input], None) - } -} - -impl Predictor for Gpt2Predictor { - fn predict(self: Arc, raw_input: &str) -> Vec { - self.generate(raw_input) - } -} diff --git a/divvunspell/src/predictor/mod.rs b/divvunspell/src/predictor/mod.rs deleted file mode 100644 index 672ccfa..0000000 --- a/divvunspell/src/predictor/mod.rs +++ /dev/null @@ -1,9 +0,0 @@ -//! Autocorrect type spell-checking that predicts next word. -#[cfg(feature = "gpt2")] -pub mod gpt2; - -use std::sync::Arc; - -pub trait Predictor { - fn predict(self: Arc, raw_input: &str) -> Vec; -} diff --git a/divvunspell/src/speller/mod.rs b/divvunspell/src/speller/mod.rs index 6063719..8c6ef93 100644 --- a/divvunspell/src/speller/mod.rs +++ b/divvunspell/src/speller/mod.rs @@ -18,6 +18,26 @@ use crate::types::{SymbolNumber, Weight}; pub mod suggestion; mod worker; +/// Controls whether morphological tags are preserved in FST output. +/// +/// When traversing an FST, epsilon transitions can either preserve their symbols +/// (keeping morphological tags like "+V", "+Noun", etc.) or convert them to true +/// epsilons (stripping the tags from the output). +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub(crate) enum OutputMode { + /// Strip morphological tags from output. + /// + /// Used for spelling correction where you want clean word forms without tags. + /// Example: "run" instead of "run+V+PresPartc" + WithoutTags, + + /// Keep morphological tags in output. + /// + /// Used for morphological analysis where you want to see the linguistic structure. + /// Example: "run+V+PresPartc" instead of "run" + WithTags, +} + /// configurable extra penalties for edit distance #[derive(Clone, Debug, Serialize, Deserialize)] #[serde(rename_all = "kebab-case")] @@ -128,41 +148,71 @@ const fn default_node_pool_size() -> usize { const fn default_recase() -> bool { true } -/// can determine if string is a correct word or suggest corrections. -/// Also with SpellerConfig. +/// FST-based spell checker and morphological analyzer. +/// +/// This trait provides methods for spell checking and morphological analysis +/// using finite-state transducers. The same FST traversal logic is used for both +/// operations - the difference is controlled by the `OutputMode`: +/// +/// - `OutputMode::WithoutTags` strips morphological tags (for spelling correction) +/// - `OutputMode::WithTags` preserves morphological tags (for morphological analysis) pub trait Speller { - /// check if the word is correctly spelled + /// Check if the word is correctly spelled + #[must_use] fn is_correct(self: Arc, word: &str) -> bool; - /// check if word is correctly spelled with config recasing etc. + + /// Check if word is correctly spelled with config (handles recasing, etc.) + #[must_use] fn is_correct_with_config(self: Arc, word: &str, config: &SpellerConfig) -> bool; - /// suggest corrections to word + + /// Generate suggestions or analyses for a word. + #[must_use] fn suggest(self: Arc, word: &str) -> Vec; - /// suggest corrections with recasing and reweighting from config + + /// Generate suggestions with config options (recasing, reweighting, etc.) + #[must_use] fn suggest_with_config(self: Arc, word: &str, config: &SpellerConfig) -> Vec; -} -/// can provide in-depth analyses along with suggestions -pub trait Analyzer: Speller { - /// analyse the input word form + /// Analyze the input word form. + /// + /// Performs lexicon-only traversal (no error model) to get morphological analyses + /// of exactly what was typed. Does not generate spelling corrections. + #[must_use] fn analyze_input(self: Arc, word: &str) -> Vec; - /// analyse input word form with recasing and stuff from configs + + /// Analyze input word form with config options. + #[must_use] fn analyze_input_with_config( self: Arc, word: &str, config: &SpellerConfig, ) -> Vec; - /// analyse the suggested word forms + + /// Analyze the suggested word forms. + /// + /// Generates spelling corrections using the error model, then returns them with + /// morphological tags preserved (equivalent to `suggest(word, OutputMode::WithTags)`). + #[must_use] fn analyze_output(self: Arc, word: &str) -> Vec; - /// analyse the suggested word forms with recasing and stuff from configs + + /// Analyze suggested word forms with config options. + #[must_use] fn analyze_output_with_config( self: Arc, word: &str, config: &SpellerConfig, ) -> Vec; - /// create suggestion list and use their analyses for finetununt - fn analyse_suggest(self: Arc, word: &str) -> Vec; - /// create suggestion list and use analyses to finetune with config - fn analyse_suggest_with_config( + + /// Create suggestion list and use their analyses for filtering. + /// + /// Gets spelling corrections, analyzes each one, and filters based on + /// morphological analysis results. + #[must_use] + fn analyze_suggest(self: Arc, word: &str) -> Vec; + + /// Create suggestion list and use analyses for filtering with config. + #[must_use] + fn analyze_suggest_with_config( self: Arc, word: &str, config: &SpellerConfig, @@ -202,8 +252,12 @@ where config ); for word in std::iter::once(word.into()).chain(words.into_iter()) { - let worker = SpellerWorker::new(self.clone(), - self.to_input_vec(&word), config.clone(), false); + let worker = SpellerWorker::new( + self.clone(), + self.to_input_vec(&word), + config.clone(), + OutputMode::WithoutTags, + ); if worker.is_correct() { return true; @@ -224,42 +278,26 @@ where } fn suggest_with_config(self: Arc, word: &str, config: &SpellerConfig) -> Vec { - use crate::tokenizer::case_handling::*; - - if word.len() == 0 { - return vec![]; - } - - if let Some(reweight) = config.reweight.as_ref() { - let case_handler = word_variants(word); - - self.suggest_case(case_handler, config, reweight) - } else { - self.suggest_single(word, config) - } + self._suggest_with_config(word, config, OutputMode::WithoutTags) } -} -impl Analyzer for HfstSpeller -where - F: crate::vfs::File + Send, - T: Transducer + Send, - U: Transducer + Send, -{ - #[allow(clippy::wrong_self_convention)] fn analyze_input_with_config( self: Arc, word: &str, config: &SpellerConfig, ) -> Vec { - if word.len() == 0 { + if word.is_empty() { return vec![]; } - let worker = SpellerWorker::new(self.clone(), - self.to_input_vec(&word), config.clone(), false); + let worker = SpellerWorker::new( + self.clone(), + self.to_input_vec(word), + config.clone(), + OutputMode::WithTags, + ); - log::trace!("Beginning analyze with config in mod"); + log::trace!("Beginning analyze_input with config"); worker.analyze() } @@ -268,41 +306,30 @@ where self.analyze_input_with_config(word, &SpellerConfig::default()) } - #[inline] - fn analyze_output(self: Arc, word: &str) -> Vec { - self.analyze_output_with_config(word, &SpellerConfig::default()) - } - - #[inline] - fn analyse_suggest(self: Arc, word: &str) -> Vec { - self.analyse_suggest_with_config(word, &SpellerConfig::default()) - } - fn analyze_output_with_config( self: Arc, word: &str, config: &SpellerConfig, ) -> Vec { - if word.len() == 0 { - return vec![]; - } - log::trace!("Beginning analyze suggest with config in mod"); - let worker = SpellerWorker::new(self.clone(), - self.to_input_vec(word), config.clone(), false); + self._suggest_with_config(word, config, OutputMode::WithTags) + } - worker.suggest() + #[inline] + fn analyze_output(self: Arc, word: &str) -> Vec { + self.analyze_output_with_config(word, &SpellerConfig::default()) } - fn analyse_suggest_with_config( + fn analyze_suggest_with_config( self: Arc, word: &str, - config: &SpellerConfig + config: &SpellerConfig, ) -> Vec { let mut suggs = self.clone().suggest_with_config(word, config); suggs.retain(|sugg| { log::trace!("suggestion {}", sugg.value); - let analyses = self.clone().analyze_input_with_config(sugg.value.as_str(), - config); + let analyses = self + .clone() + .analyze_input_with_config(sugg.value.as_str(), config); let mut all_filtered = true; for analysis in analyses { log::trace!("-> {}", analysis.value); @@ -317,6 +344,10 @@ where suggs } + #[inline] + fn analyze_suggest(self: Arc, word: &str) -> Vec { + self.analyze_suggest_with_config(word, &SpellerConfig::default()) + } } /// a speller consisting of two HFST automata @@ -351,6 +382,27 @@ where }) } + fn _suggest_with_config( + self: Arc, + word: &str, + config: &SpellerConfig, + mode: OutputMode, + ) -> Vec { + use crate::tokenizer::case_handling::*; + + if word.len() == 0 { + return vec![]; + } + + if let Some(reweight) = config.reweight.as_ref() { + let case_handler = word_variants(word); + + self.suggest_case(case_handler, config, reweight, mode) + } else { + self.suggest_single(word, config, mode) + } + } + /// get the error model automaton pub fn mutator(&self) -> &T { &self.mutator @@ -382,9 +434,14 @@ where .collect() } - fn suggest_single(self: Arc, word: &str, config: &SpellerConfig) -> Vec { - let worker = SpellerWorker::new(self.clone(), self.to_input_vec(word), - config.clone(), true); + fn suggest_single( + self: Arc, + word: &str, + config: &SpellerConfig, + mode: OutputMode, + ) -> Vec { + let worker = + SpellerWorker::new(self.clone(), self.to_input_vec(word), config.clone(), mode); log::trace!("suggesting single {}", word); worker.suggest() @@ -395,6 +452,7 @@ where case: CaseHandler, config: &SpellerConfig, reweight: &ReweightingConfig, + output_mode: OutputMode, ) -> Vec { use crate::tokenizer::case_handling::*; @@ -409,8 +467,12 @@ where for word in std::iter::once(&original_input).chain(words.iter()) { log::trace!("suggesting for word {}", word); - let worker = SpellerWorker::new(self.clone(), - self.to_input_vec(&word), config.clone(), true); + let worker = SpellerWorker::new( + self.clone(), + self.to_input_vec(&word), + config.clone(), + output_mode, + ); let mut suggestions = worker.suggest(); match mutation { diff --git a/divvunspell/src/speller/worker.rs b/divvunspell/src/speller/worker.rs index df6edd7..282dc19 100644 --- a/divvunspell/src/speller/worker.rs +++ b/divvunspell/src/speller/worker.rs @@ -5,14 +5,14 @@ use std::sync::Arc; use lifeguard::{Pool, Recycled}; -use super::{HfstSpeller, SpellerConfig}; +use super::{HfstSpeller, OutputMode, SpellerConfig}; use crate::speller::suggestion::Suggestion; use crate::transducer::tree_node::TreeNode; use crate::transducer::Transducer; use crate::types::{SymbolNumber, Weight}; #[inline(always)] -fn speller_start_node(pool: &Pool, size: usize) -> Vec> { +fn speller_start_node(pool: &Pool, size: usize) -> Vec> { let start_node = TreeNode::empty(pool, vec![0; size]); let mut nodes = Vec::with_capacity(256); nodes.push(start_node); @@ -23,7 +23,7 @@ pub struct SpellerWorker, U: Transducer speller: Arc>, input: Vec, config: SpellerConfig, - mode_correcting: bool, + output_mode: OutputMode, } #[allow(clippy::too_many_arguments)] @@ -38,13 +38,13 @@ where speller: Arc>, input: Vec, config: SpellerConfig, - mode_correcting: bool, + output_mode: OutputMode, ) -> SpellerWorker { SpellerWorker { speller, input, config, - mode_correcting, + output_mode, } } @@ -73,11 +73,10 @@ where if self .is_under_weight_limit(max_weight, next_node.weight() + transition_weight) { - let new_node = if self.mode_correcting { - next_node.update_lexicon(pool, - transition.clone_with_epsilon_symbol()) - } else { - next_node.update_lexicon(pool, transition) + let new_node = match self.output_mode { + OutputMode::WithoutTags => next_node + .update_lexicon(pool, transition.clone_with_epsilon_symbol()), + OutputMode::WithTags => next_node.update_lexicon(pool, transition), }; output_nodes.push(new_node); } @@ -228,26 +227,23 @@ where ); if is_under_weight_limit { - let new_node = if self.mode_correcting { - next_node.update( + let new_node = match self.output_mode { + OutputMode::WithoutTags => next_node.update( pool, input_sym, - Some(next_node.input_state + input_increment as - u32), + Some(next_node.input_state + input_increment as u32), mutator_state, noneps_trans.target().unwrap(), noneps_trans.weight().unwrap() + mutator_weight, - ) - - } else { - next_node.update( - pool, - sym, - Some(next_node.input_state + input_increment as u32), - mutator_state, - noneps_trans.target().unwrap(), - noneps_trans.weight().unwrap() + mutator_weight, - ) + ), + OutputMode::WithTags => next_node.update( + pool, + sym, + Some(next_node.input_state + input_increment as u32), + mutator_state, + noneps_trans.target().unwrap(), + noneps_trans.weight().unwrap() + mutator_weight, + ), }; output_nodes.push(new_node); } @@ -548,10 +544,10 @@ where .string_from_symbols(&next_node.string); let weight = next_node.weight() + self - .speller - .lexicon() - .final_weight(next_node.lexicon_state) - .unwrap(); + .speller + .lexicon() + .final_weight(next_node.lexicon_state) + .unwrap(); let entry = lookups.entry(string).or_insert(weight); if *entry > weight { *entry = weight; @@ -562,7 +558,6 @@ where analyses = self.generate_sorted_suggestions(&lookups); } analyses - } pub(crate) fn suggest(&self) -> Vec { diff --git a/divvunspell/src/tokenizer/mod.rs b/divvunspell/src/tokenizer/mod.rs index 6b59e9c..a0e871a 100644 --- a/divvunspell/src/tokenizer/mod.rs +++ b/divvunspell/src/tokenizer/mod.rs @@ -27,8 +27,8 @@ impl<'a> Iterator for WordIndices<'a> { pub trait Tokenize { fn word_bound_indices(&self) -> WordBoundIndices<'_>; fn word_indices(&self) -> WordIndices<'_>; - fn word_bound_indices_with_alphabet(&self, alphabet: Vec) -> WordBoundIndices; - fn words_with_alphabet(&self, alphabet: Vec) -> Words; + fn word_bound_indices_with_alphabet(&self, alphabet: Vec) -> WordBoundIndices<'_>; + fn words_with_alphabet(&self, alphabet: Vec) -> Words<'_>; } impl Tokenize for str { @@ -42,11 +42,11 @@ impl Tokenize for str { } } - fn word_bound_indices_with_alphabet(&self, alphabet: Vec) -> WordBoundIndices { + fn word_bound_indices_with_alphabet(&self, alphabet: Vec) -> WordBoundIndices<'_> { WordBoundIndices::new_with_alphabet(self, alphabet) } - fn words_with_alphabet(&self, alphabet: Vec) -> Words { + fn words_with_alphabet(&self, alphabet: Vec) -> Words<'_> { Words::new_with_alphabet(self, |s| s.chars().any(|ch| ch.is_alphanumeric()), alphabet) } } diff --git a/divvunspell/src/transducer/convert.rs b/divvunspell/src/transducer/convert.rs index cf18c41..50baa1c 100644 --- a/divvunspell/src/transducer/convert.rs +++ b/divvunspell/src/transducer/convert.rs @@ -89,9 +89,7 @@ impl ConvertFrom for thfst::MemmapTransitionTable(input_symbol).unwrap(); writer.write_u16::(output_symbol).unwrap(); writer.write_u32::(target).unwrap(); - writer - .write_u32::(unsafe { std::mem::transmute::(weight) }) - .unwrap(); + writer.write_u32::(weight.to_bits()).unwrap(); } Ok(()) diff --git a/divvunspell/src/transducer/mod.rs b/divvunspell/src/transducer/mod.rs index b8fc20d..4e01e85 100644 --- a/divvunspell/src/transducer/mod.rs +++ b/divvunspell/src/transducer/mod.rs @@ -12,9 +12,8 @@ mod alphabet; mod symbol_transition; pub(crate) mod tree_node; -pub(crate) use self::alphabet::TransducerAlphabet; - -use self::symbol_transition::SymbolTransition; +pub use self::alphabet::TransducerAlphabet; +pub use self::symbol_transition::SymbolTransition; use crate::types::{SymbolNumber, TransitionTableIndex, Weight}; use crate::vfs::{self, Filesystem}; @@ -46,6 +45,11 @@ impl TransducerError { } /// A file-based finite-state transducer. +/// +/// This trait defines the interface for finite-state transducers that can be loaded +/// from files and used for spell-checking and morphological analysis. +/// +/// Implementors can provide custom transducer formats beyond the built-in HFST and THFST formats. pub trait Transducer: Sized { /// file extension. const FILE_EXT: &'static str; diff --git a/divvunspell/src/transducer/symbol_transition.rs b/divvunspell/src/transducer/symbol_transition.rs index cd2119c..46c2ab8 100644 --- a/divvunspell/src/transducer/symbol_transition.rs +++ b/divvunspell/src/transducer/symbol_transition.rs @@ -1,10 +1,16 @@ use crate::types::{SymbolNumber, TransitionTableIndex, Weight}; +/// Represents a transition in a finite-state transducer. +/// +/// A transition connects states in the FST and carries a symbol and weight. #[derive(Debug, Clone)] pub struct SymbolTransition { - target: Option, - symbol: Option, - weight: Option, + /// Target state index, or None if this is a final state + pub target: Option, + /// Input/output symbol number + pub symbol: Option, + /// Transition weight + pub weight: Option, } impl SymbolTransition { diff --git a/divvunspell/src/transducer/tree_node.rs b/divvunspell/src/transducer/tree_node.rs index c7b75ef..61e58cb 100644 --- a/divvunspell/src/transducer/tree_node.rs +++ b/divvunspell/src/transducer/tree_node.rs @@ -9,13 +9,13 @@ use crate::types::{ }; #[derive(Debug, Clone)] -pub struct TreeNode { - pub lexicon_state: TransitionTableIndex, - pub mutator_state: TransitionTableIndex, - pub input_state: u32, - pub weight: f32, - pub flag_state: FlagDiacriticState, - pub string: Vec, +pub(crate) struct TreeNode { + pub(crate) lexicon_state: TransitionTableIndex, + pub(crate) mutator_state: TransitionTableIndex, + pub(crate) input_state: u32, + pub(crate) weight: f32, + pub(crate) flag_state: FlagDiacriticState, + pub(crate) string: Vec, } impl std::cmp::PartialEq for TreeNode { diff --git a/docs/src/divvunspell/archive/meta.rs.html b/docs/src/divvunspell/archive/meta.rs.html index e7ce629..b635b89 100644 --- a/docs/src/divvunspell/archive/meta.rs.html +++ b/docs/src/divvunspell/archive/meta.rs.html @@ -211,7 +211,7 @@ <locale>se</locale> <title>Giellatekno/Divvun/UiT fst-based speller for Northern Sami</title> <description>This is an fst-based speller for Northern Sami. It is based - on the normative subset of the morphological analyser for Northern Sami. + on the normative subset of the morphological analyzer for Northern Sami. The source code can be found at: https://victorio.uit.no/langtech/trunk/langs/sme/ License: GPL3+.</description> diff --git a/thfst-tools/src/main.rs b/thfst-tools/src/main.rs index f5a8e96..683d38d 100644 --- a/thfst-tools/src/main.rs +++ b/thfst-tools/src/main.rs @@ -142,8 +142,10 @@ fn convert_zhfst_to_bhfst(zhfst_path: &Path) -> Result<(), std::io::Error> { Some(metadata) => { println!("Converting \"index.xml\" to \"meta.json\"..."); let mut m = metadata.to_owned(); - m.acceptor.id = metadata.acceptor.id.replace(".hfst", ".thfst"); - m.errmodel.id = metadata.errmodel.id.replace(".hfst", ".thfst"); + m.acceptor_mut() + .set_id(metadata.acceptor().id().replace(".hfst", ".thfst")); + m.errmodel_mut() + .set_id(metadata.errmodel().id().replace(".hfst", ".thfst")); Some(serde_json::to_string_pretty(&m)?) } None => None, From 7e5c268194663b67b9eef3e077033d9572385fe0 Mon Sep 17 00:00:00 2001 From: Brendan Molloy Date: Thu, 9 Oct 2025 18:20:54 +0200 Subject: [PATCH 19/21] Reorganise repository --- Cargo.lock | 2 +- Cargo.toml | 63 +++++- LICENSE-MIT | 6 +- {divvunspell-bin => cli}/Cargo.toml | 4 +- {accuracy => cli}/LICENSE | 0 {divvunspell-bin => cli}/src/main.rs | 0 {accuracy => crates/accuracy}/Cargo.toml | 2 +- {divvunspell-bin => crates/accuracy}/LICENSE | 0 {accuracy => crates/accuracy}/src/main.rs | 0 {regtest => crates/regtest}/Cargo.toml | 2 +- {regtest => crates/regtest}/src/main.rs | 0 .../thfst-tools}/Cargo.toml | 2 +- {thfst-tools => crates/thfst-tools}/LICENSE | 0 .../thfst-tools}/src/main.rs | 0 divvunspell/Cargo.toml | 54 ----- divvunspell/LICENSE-APACHE | 202 ------------------ divvunspell/LICENSE-MIT | 21 -- .../examples => examples}/find-path.rs | 0 {divvunspell/src => src}/archive/boxf.rs | 0 {divvunspell/src => src}/archive/error.rs | 0 {divvunspell/src => src}/archive/meta.rs | 0 {divvunspell/src => src}/archive/mod.rs | 0 {divvunspell/src => src}/archive/zip.rs | 0 {divvunspell/src => src}/constants.rs | 0 {divvunspell/src => src}/ffi/fbs/mod.rs | 0 {divvunspell/src => src}/ffi/fbs/tokenizer.rs | 0 {divvunspell/src => src}/ffi/mod.rs | 0 {divvunspell/src => src}/lib.rs | 0 {divvunspell/src => src}/paths.rs | 0 {divvunspell/src => src}/speller/mod.rs | 0 .../src => src}/speller/suggestion.rs | 0 {divvunspell/src => src}/speller/worker.rs | 0 .../src => src}/tokenizer/case_handling.rs | 0 {divvunspell/src => src}/tokenizer/mod.rs | 0 .../tokenizer/tables/word_break.rsv | 0 {divvunspell/src => src}/tokenizer/word.rs | 0 .../src => src}/tokenizer/word_break.rs | 0 .../src => src}/transducer/alphabet.rs | 0 .../src => src}/transducer/convert.rs | 0 .../src => src}/transducer/hfst/alphabet.rs | 0 .../src => src}/transducer/hfst/header.rs | 0 .../transducer/hfst/index_table.rs | 0 .../src => src}/transducer/hfst/mod.rs | 0 .../transducer/hfst/transition_table.rs | 0 {divvunspell/src => src}/transducer/mod.rs | 0 .../transducer/symbol_transition.rs | 0 .../src => src}/transducer/thfst/chunked.rs | 0 .../transducer/thfst/index_table.rs | 0 .../src => src}/transducer/thfst/mod.rs | 0 .../transducer/thfst/transition_table.rs | 0 .../src => src}/transducer/tree_node.rs | 0 {divvunspell/src => src}/types.rs | 0 {divvunspell/src => src}/vfs.rs | 0 53 files changed, 67 insertions(+), 291 deletions(-) rename {divvunspell-bin => cli}/Cargo.toml (80%) rename {accuracy => cli}/LICENSE (100%) rename {divvunspell-bin => cli}/src/main.rs (100%) rename {accuracy => crates/accuracy}/Cargo.toml (84%) rename {divvunspell-bin => crates/accuracy}/LICENSE (100%) rename {accuracy => crates/accuracy}/src/main.rs (100%) rename {regtest => crates/regtest}/Cargo.toml (55%) rename {regtest => crates/regtest}/src/main.rs (100%) rename {thfst-tools => crates/thfst-tools}/Cargo.toml (76%) rename {thfst-tools => crates/thfst-tools}/LICENSE (100%) rename {thfst-tools => crates/thfst-tools}/src/main.rs (100%) delete mode 100644 divvunspell/Cargo.toml delete mode 100644 divvunspell/LICENSE-APACHE delete mode 100644 divvunspell/LICENSE-MIT rename {divvunspell/examples => examples}/find-path.rs (100%) rename {divvunspell/src => src}/archive/boxf.rs (100%) rename {divvunspell/src => src}/archive/error.rs (100%) rename {divvunspell/src => src}/archive/meta.rs (100%) rename {divvunspell/src => src}/archive/mod.rs (100%) rename {divvunspell/src => src}/archive/zip.rs (100%) rename {divvunspell/src => src}/constants.rs (100%) rename {divvunspell/src => src}/ffi/fbs/mod.rs (100%) rename {divvunspell/src => src}/ffi/fbs/tokenizer.rs (100%) rename {divvunspell/src => src}/ffi/mod.rs (100%) rename {divvunspell/src => src}/lib.rs (100%) rename {divvunspell/src => src}/paths.rs (100%) rename {divvunspell/src => src}/speller/mod.rs (100%) rename {divvunspell/src => src}/speller/suggestion.rs (100%) rename {divvunspell/src => src}/speller/worker.rs (100%) rename {divvunspell/src => src}/tokenizer/case_handling.rs (100%) rename {divvunspell/src => src}/tokenizer/mod.rs (100%) rename {divvunspell/src => src}/tokenizer/tables/word_break.rsv (100%) rename {divvunspell/src => src}/tokenizer/word.rs (100%) rename {divvunspell/src => src}/tokenizer/word_break.rs (100%) rename {divvunspell/src => src}/transducer/alphabet.rs (100%) rename {divvunspell/src => src}/transducer/convert.rs (100%) rename {divvunspell/src => src}/transducer/hfst/alphabet.rs (100%) rename {divvunspell/src => src}/transducer/hfst/header.rs (100%) rename {divvunspell/src => src}/transducer/hfst/index_table.rs (100%) rename {divvunspell/src => src}/transducer/hfst/mod.rs (100%) rename {divvunspell/src => src}/transducer/hfst/transition_table.rs (100%) rename {divvunspell/src => src}/transducer/mod.rs (100%) rename {divvunspell/src => src}/transducer/symbol_transition.rs (100%) rename {divvunspell/src => src}/transducer/thfst/chunked.rs (100%) rename {divvunspell/src => src}/transducer/thfst/index_table.rs (100%) rename {divvunspell/src => src}/transducer/thfst/mod.rs (100%) rename {divvunspell/src => src}/transducer/thfst/transition_table.rs (100%) rename {divvunspell/src => src}/transducer/tree_node.rs (100%) rename {divvunspell/src => src}/types.rs (100%) rename {divvunspell/src => src}/vfs.rs (100%) diff --git a/Cargo.lock b/Cargo.lock index 998346d..54a0e3f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -541,7 +541,7 @@ dependencies = [ ] [[package]] -name = "divvunspell-bin" +name = "divvunspell-cli" version = "1.0.0" dependencies = [ "anyhow", diff --git a/Cargo.toml b/Cargo.toml index c434ef9..3203476 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,11 +1,64 @@ +[package] +name = "divvunspell" +description = "Spell checking library for ZHFST/BHFST spellers, with case handling and tokenization support." +version = "1.0.0-beta.5" +authors = ["Brendan Molloy "] +edition = "2021" +license = "MIT OR Apache-2.0" +repository = "https://github.com/divvun/divvunspell" + +[lib] +name = "divvunspell" +crate-type = ["rlib", "staticlib", "cdylib"] + +[dependencies] +libc = "0.2" +memmap2 = "0.9.4" +byteorder = "1.3.4" +serde = { version = "1.0.116", features = ["derive"] } +serde_json = "1.0.57" +serde-xml-rs = { version = "0.6.0", default-features = false } +zip = { version = "0.5", default-features = false } +unic-segment = "0.9.0" +unic-char-range = "0.9.0" +unic-char-property = "0.9.0" +unic-ucd-category = "0.9.0" +unic-emoji-char = "0.9.0" +parking_lot = "0.11.2" +hashbrown = { version = "0.11", features = ["serde"] } +lifeguard = "0.6.1" +smol_str = { version = "0.2.1", features = ["serde"] } +box-format = { version = "0.3.2", features = ["reader"], default-features = false } +itertools = "0.12.1" +strsim = "0.11.0" +log = "0.4.11" +cffi = { git = "https://github.com/cffi-rs/cffi", optional = true } +unic-ucd-common = "0.9.0" +flatbuffers = { version = "0.6.1", optional = true } +env_logger = { version = "0.11.2", optional = true } +thiserror = "1.0.20" +tempfile = "3.3.0" +fs_extra = "1.2.0" +eieio = "1.0.0" +pathos = "0.3.0" +language-tags = "0.3.2" +globwalk = "0.9.1" + +[features] +compression = ["zip/deflate"] +logging = ["env_logger"] +cargo-clippy = [] + +# Internal features: unstable, not for external use! +internal_convert = [] +internal_ffi = ["flatbuffers", "logging", "cffi"] + [workspace] resolver = "2" members = [ - "divvunspell", - "accuracy", - "divvunspell-bin", - "thfst-tools", - "regtest", + ".", + "cli", + "crates/*" ] [profile.dev] diff --git a/LICENSE-MIT b/LICENSE-MIT index c5469f7..07ad97c 100644 --- a/LICENSE-MIT +++ b/LICENSE-MIT @@ -1,6 +1,6 @@ -Copyright (c) 2017-2021 Brendan Molloy -Copyright (c) 2018-2021 UiT The Arctic University of Norway -Copyright (c) 2018-2021 Sámediggi +Copyright (c) 2017-2025 Brendan Molloy +Copyright (c) 2018-2025 UiT The Arctic University of Norway +Copyright (c) 2018-2025 Sámediggi Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/divvunspell-bin/Cargo.toml b/cli/Cargo.toml similarity index 80% rename from divvunspell-bin/Cargo.toml rename to cli/Cargo.toml index 28403a3..1d3c437 100644 --- a/divvunspell-bin/Cargo.toml +++ b/cli/Cargo.toml @@ -1,5 +1,5 @@ [package] -name = "divvunspell-bin" +name = "divvunspell-cli" description = "Spellchecker for ZHFST/BHFST spellers, with case handling and tokenization support." version = "1.0.0" authors = ["Brendan Molloy "] @@ -14,7 +14,7 @@ path = "src/main.rs" [dependencies] serde = { version = "1.0.116", features = ["derive"] } serde_json = "1.0.57" -divvunspell = { version = "1.0.0-beta.5", features = ["internal_convert", "compression"], path = "../divvunspell" } +divvunspell = { features = ["internal_convert", "compression"], path = ".." } box-format = { version = "0.3.2", features = ["reader"], default-features = false } pretty_env_logger = "0.5.0" gumdrop = "0.8.0" diff --git a/accuracy/LICENSE b/cli/LICENSE similarity index 100% rename from accuracy/LICENSE rename to cli/LICENSE diff --git a/divvunspell-bin/src/main.rs b/cli/src/main.rs similarity index 100% rename from divvunspell-bin/src/main.rs rename to cli/src/main.rs diff --git a/accuracy/Cargo.toml b/crates/accuracy/Cargo.toml similarity index 84% rename from accuracy/Cargo.toml rename to crates/accuracy/Cargo.toml index 3180bc8..a7643e4 100644 --- a/accuracy/Cargo.toml +++ b/crates/accuracy/Cargo.toml @@ -9,7 +9,7 @@ publish = false [dependencies] serde = { version = "1.0.116", features = ["derive"] } serde_json = "1.0.57" -divvunspell = { version = "1.0.0-beta.5", features = ["internal_convert", "compression"], path = "../divvunspell" } +divvunspell = { features = ["internal_convert", "compression"], path = "../.." } csv = { version = "1.1" } rayon = { version = "1.4.0" } indicatif = { version = "0.15", features = ["with_rayon"] } diff --git a/divvunspell-bin/LICENSE b/crates/accuracy/LICENSE similarity index 100% rename from divvunspell-bin/LICENSE rename to crates/accuracy/LICENSE diff --git a/accuracy/src/main.rs b/crates/accuracy/src/main.rs similarity index 100% rename from accuracy/src/main.rs rename to crates/accuracy/src/main.rs diff --git a/regtest/Cargo.toml b/crates/regtest/Cargo.toml similarity index 55% rename from regtest/Cargo.toml rename to crates/regtest/Cargo.toml index c64339d..41d8a68 100644 --- a/regtest/Cargo.toml +++ b/crates/regtest/Cargo.toml @@ -6,4 +6,4 @@ edition = "2021" [dependencies] clap = { version = "4.5.32", features = ["derive"] } csv = "1.3.1" -divvunspell = { version = "1.0.0-beta.5", features = ["internal_convert", "compression"], path = "../divvunspell" } +divvunspell = { features = ["internal_convert", "compression"], path = "../.." } diff --git a/regtest/src/main.rs b/crates/regtest/src/main.rs similarity index 100% rename from regtest/src/main.rs rename to crates/regtest/src/main.rs diff --git a/thfst-tools/Cargo.toml b/crates/thfst-tools/Cargo.toml similarity index 76% rename from thfst-tools/Cargo.toml rename to crates/thfst-tools/Cargo.toml index ee29450..b5c1475 100644 --- a/thfst-tools/Cargo.toml +++ b/crates/thfst-tools/Cargo.toml @@ -9,7 +9,7 @@ repository = "https://github.com/divvun/divvunspell" [dependencies] serde_json = "1.0.57" -divvunspell = { version = "1.0.0-beta.5", features = ["internal_convert", "compression"], path = "../divvunspell" } +divvunspell = { features = ["internal_convert", "compression"], path = "../.." } box-format = "0.3.2" structopt = "0.3.17" tempfile = "3" diff --git a/thfst-tools/LICENSE b/crates/thfst-tools/LICENSE similarity index 100% rename from thfst-tools/LICENSE rename to crates/thfst-tools/LICENSE diff --git a/thfst-tools/src/main.rs b/crates/thfst-tools/src/main.rs similarity index 100% rename from thfst-tools/src/main.rs rename to crates/thfst-tools/src/main.rs diff --git a/divvunspell/Cargo.toml b/divvunspell/Cargo.toml deleted file mode 100644 index bae4123..0000000 --- a/divvunspell/Cargo.toml +++ /dev/null @@ -1,54 +0,0 @@ -[package] -name = "divvunspell" -description = "Spell checking library for ZHFST/BHFST spellers, with case handling and tokenization support." -version = "1.0.0-beta.5" -authors = ["Brendan Molloy "] -edition = "2021" -license = "MIT OR Apache-2.0" -repository = "https://github.com/divvun/divvunspell" - -[lib] -name = "divvunspell" -crate-type = ["rlib", "staticlib", "cdylib"] - -[dependencies] -libc = "0.2" -memmap2 = "0.9.4" -byteorder = "1.3.4" -serde = { version = "1.0.116", features = ["derive"] } -serde_json = "1.0.57" -serde-xml-rs = { version = "0.6.0", default-features = false } -zip = { version = "0.5", default-features = false } -unic-segment = "0.9.0" -unic-char-range = "0.9.0" -unic-char-property = "0.9.0" -unic-ucd-category = "0.9.0" -unic-emoji-char = "0.9.0" -parking_lot = "0.11.2" -hashbrown = { version = "0.11", features = ["serde"] } -lifeguard = "0.6.1" -smol_str = { version = "0.2.1", features = ["serde"] } -box-format = { version = "0.3.2", features = ["reader"], default-features = false } -itertools = "0.12.1" -strsim = "0.11.0" -log = "0.4.11" -cffi = { git = "https://github.com/cffi-rs/cffi", optional = true } -unic-ucd-common = "0.9.0" -flatbuffers = { version = "0.6.1", optional = true } -env_logger = { version = "0.11.2", optional = true } -thiserror = "1.0.20" -tempfile = "3.3.0" -fs_extra = "1.2.0" -eieio = "1.0.0" -pathos = "0.3.0" -language-tags = "0.3.2" -globwalk = "0.9.1" - -[features] -compression = ["zip/deflate"] -logging = ["env_logger"] -cargo-clippy = [] - -# Internal features: unstable, not for external use! -internal_convert = [] -internal_ffi = ["flatbuffers", "logging", "cffi"] diff --git a/divvunspell/LICENSE-APACHE b/divvunspell/LICENSE-APACHE deleted file mode 100644 index 8f71f43..0000000 --- a/divvunspell/LICENSE-APACHE +++ /dev/null @@ -1,202 +0,0 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "{}" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright {yyyy} {name of copyright owner} - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - diff --git a/divvunspell/LICENSE-MIT b/divvunspell/LICENSE-MIT deleted file mode 100644 index c5469f7..0000000 --- a/divvunspell/LICENSE-MIT +++ /dev/null @@ -1,21 +0,0 @@ -Copyright (c) 2017-2021 Brendan Molloy -Copyright (c) 2018-2021 UiT The Arctic University of Norway -Copyright (c) 2018-2021 Sámediggi - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. diff --git a/divvunspell/examples/find-path.rs b/examples/find-path.rs similarity index 100% rename from divvunspell/examples/find-path.rs rename to examples/find-path.rs diff --git a/divvunspell/src/archive/boxf.rs b/src/archive/boxf.rs similarity index 100% rename from divvunspell/src/archive/boxf.rs rename to src/archive/boxf.rs diff --git a/divvunspell/src/archive/error.rs b/src/archive/error.rs similarity index 100% rename from divvunspell/src/archive/error.rs rename to src/archive/error.rs diff --git a/divvunspell/src/archive/meta.rs b/src/archive/meta.rs similarity index 100% rename from divvunspell/src/archive/meta.rs rename to src/archive/meta.rs diff --git a/divvunspell/src/archive/mod.rs b/src/archive/mod.rs similarity index 100% rename from divvunspell/src/archive/mod.rs rename to src/archive/mod.rs diff --git a/divvunspell/src/archive/zip.rs b/src/archive/zip.rs similarity index 100% rename from divvunspell/src/archive/zip.rs rename to src/archive/zip.rs diff --git a/divvunspell/src/constants.rs b/src/constants.rs similarity index 100% rename from divvunspell/src/constants.rs rename to src/constants.rs diff --git a/divvunspell/src/ffi/fbs/mod.rs b/src/ffi/fbs/mod.rs similarity index 100% rename from divvunspell/src/ffi/fbs/mod.rs rename to src/ffi/fbs/mod.rs diff --git a/divvunspell/src/ffi/fbs/tokenizer.rs b/src/ffi/fbs/tokenizer.rs similarity index 100% rename from divvunspell/src/ffi/fbs/tokenizer.rs rename to src/ffi/fbs/tokenizer.rs diff --git a/divvunspell/src/ffi/mod.rs b/src/ffi/mod.rs similarity index 100% rename from divvunspell/src/ffi/mod.rs rename to src/ffi/mod.rs diff --git a/divvunspell/src/lib.rs b/src/lib.rs similarity index 100% rename from divvunspell/src/lib.rs rename to src/lib.rs diff --git a/divvunspell/src/paths.rs b/src/paths.rs similarity index 100% rename from divvunspell/src/paths.rs rename to src/paths.rs diff --git a/divvunspell/src/speller/mod.rs b/src/speller/mod.rs similarity index 100% rename from divvunspell/src/speller/mod.rs rename to src/speller/mod.rs diff --git a/divvunspell/src/speller/suggestion.rs b/src/speller/suggestion.rs similarity index 100% rename from divvunspell/src/speller/suggestion.rs rename to src/speller/suggestion.rs diff --git a/divvunspell/src/speller/worker.rs b/src/speller/worker.rs similarity index 100% rename from divvunspell/src/speller/worker.rs rename to src/speller/worker.rs diff --git a/divvunspell/src/tokenizer/case_handling.rs b/src/tokenizer/case_handling.rs similarity index 100% rename from divvunspell/src/tokenizer/case_handling.rs rename to src/tokenizer/case_handling.rs diff --git a/divvunspell/src/tokenizer/mod.rs b/src/tokenizer/mod.rs similarity index 100% rename from divvunspell/src/tokenizer/mod.rs rename to src/tokenizer/mod.rs diff --git a/divvunspell/src/tokenizer/tables/word_break.rsv b/src/tokenizer/tables/word_break.rsv similarity index 100% rename from divvunspell/src/tokenizer/tables/word_break.rsv rename to src/tokenizer/tables/word_break.rsv diff --git a/divvunspell/src/tokenizer/word.rs b/src/tokenizer/word.rs similarity index 100% rename from divvunspell/src/tokenizer/word.rs rename to src/tokenizer/word.rs diff --git a/divvunspell/src/tokenizer/word_break.rs b/src/tokenizer/word_break.rs similarity index 100% rename from divvunspell/src/tokenizer/word_break.rs rename to src/tokenizer/word_break.rs diff --git a/divvunspell/src/transducer/alphabet.rs b/src/transducer/alphabet.rs similarity index 100% rename from divvunspell/src/transducer/alphabet.rs rename to src/transducer/alphabet.rs diff --git a/divvunspell/src/transducer/convert.rs b/src/transducer/convert.rs similarity index 100% rename from divvunspell/src/transducer/convert.rs rename to src/transducer/convert.rs diff --git a/divvunspell/src/transducer/hfst/alphabet.rs b/src/transducer/hfst/alphabet.rs similarity index 100% rename from divvunspell/src/transducer/hfst/alphabet.rs rename to src/transducer/hfst/alphabet.rs diff --git a/divvunspell/src/transducer/hfst/header.rs b/src/transducer/hfst/header.rs similarity index 100% rename from divvunspell/src/transducer/hfst/header.rs rename to src/transducer/hfst/header.rs diff --git a/divvunspell/src/transducer/hfst/index_table.rs b/src/transducer/hfst/index_table.rs similarity index 100% rename from divvunspell/src/transducer/hfst/index_table.rs rename to src/transducer/hfst/index_table.rs diff --git a/divvunspell/src/transducer/hfst/mod.rs b/src/transducer/hfst/mod.rs similarity index 100% rename from divvunspell/src/transducer/hfst/mod.rs rename to src/transducer/hfst/mod.rs diff --git a/divvunspell/src/transducer/hfst/transition_table.rs b/src/transducer/hfst/transition_table.rs similarity index 100% rename from divvunspell/src/transducer/hfst/transition_table.rs rename to src/transducer/hfst/transition_table.rs diff --git a/divvunspell/src/transducer/mod.rs b/src/transducer/mod.rs similarity index 100% rename from divvunspell/src/transducer/mod.rs rename to src/transducer/mod.rs diff --git a/divvunspell/src/transducer/symbol_transition.rs b/src/transducer/symbol_transition.rs similarity index 100% rename from divvunspell/src/transducer/symbol_transition.rs rename to src/transducer/symbol_transition.rs diff --git a/divvunspell/src/transducer/thfst/chunked.rs b/src/transducer/thfst/chunked.rs similarity index 100% rename from divvunspell/src/transducer/thfst/chunked.rs rename to src/transducer/thfst/chunked.rs diff --git a/divvunspell/src/transducer/thfst/index_table.rs b/src/transducer/thfst/index_table.rs similarity index 100% rename from divvunspell/src/transducer/thfst/index_table.rs rename to src/transducer/thfst/index_table.rs diff --git a/divvunspell/src/transducer/thfst/mod.rs b/src/transducer/thfst/mod.rs similarity index 100% rename from divvunspell/src/transducer/thfst/mod.rs rename to src/transducer/thfst/mod.rs diff --git a/divvunspell/src/transducer/thfst/transition_table.rs b/src/transducer/thfst/transition_table.rs similarity index 100% rename from divvunspell/src/transducer/thfst/transition_table.rs rename to src/transducer/thfst/transition_table.rs diff --git a/divvunspell/src/transducer/tree_node.rs b/src/transducer/tree_node.rs similarity index 100% rename from divvunspell/src/transducer/tree_node.rs rename to src/transducer/tree_node.rs diff --git a/divvunspell/src/types.rs b/src/types.rs similarity index 100% rename from divvunspell/src/types.rs rename to src/types.rs diff --git a/divvunspell/src/vfs.rs b/src/vfs.rs similarity index 100% rename from divvunspell/src/vfs.rs rename to src/vfs.rs From 5bc5dd1775545a3e13bc050591fa052e3de320f5 Mon Sep 17 00:00:00 2001 From: Brendan Molloy Date: Thu, 9 Oct 2025 19:15:52 +0200 Subject: [PATCH 20/21] Use newtypes for different number types --- crates/accuracy/src/main.rs | 3 +- src/constants.rs | 4 +- src/speller/mod.rs | 19 +-- src/speller/worker.rs | 116 ++++++++-------- src/transducer/alphabet.rs | 18 ++- src/transducer/convert.rs | 50 ++++--- src/transducer/hfst/alphabet.rs | 22 ++-- src/transducer/hfst/header.rs | 20 +-- src/transducer/hfst/index_table.rs | 47 ++++--- src/transducer/hfst/mod.rs | 32 +++-- src/transducer/hfst/transition_table.rs | 40 +++--- src/transducer/mod.rs | 4 +- src/transducer/symbol_transition.rs | 2 +- src/transducer/thfst/chunked.rs | 23 ++-- src/transducer/thfst/index_table.rs | 36 ++--- src/transducer/thfst/mod.rs | 20 ++- src/transducer/thfst/transition_table.rs | 45 +++---- src/transducer/tree_node.rs | 62 ++++----- src/types.rs | 160 ++++++++++++++++++++++- 19 files changed, 453 insertions(+), 270 deletions(-) diff --git a/crates/accuracy/src/main.rs b/crates/accuracy/src/main.rs index 6fcc3f1..9de7cb4 100644 --- a/crates/accuracy/src/main.rs +++ b/crates/accuracy/src/main.rs @@ -24,6 +24,7 @@ $ cargo run -- --threshold 0.9 typos.txt se.zhfst */ use chrono::prelude::*; +use divvunspell::types::Weight; use std::error::Error; use std::{ io::Write, @@ -42,7 +43,7 @@ use structopt::clap::{App, AppSettings, Arg}; static CFG: SpellerConfig = SpellerConfig { n_best: Some(10), - max_weight: Some(10000.0), + max_weight: Some(Weight(10000.0)), beam: None, reweight: Some(ReweightingConfig::default_const()), node_pool_size: 128, diff --git a/src/constants.rs b/src/constants.rs index acb5b17..365c23f 100644 --- a/src/constants.rs +++ b/src/constants.rs @@ -1,6 +1,8 @@ +use crate::types::TransitionTableIndex; + pub(crate) const INDEX_TABLE_SIZE: usize = 6; pub(crate) const TRANS_TABLE_SIZE: usize = 12; -pub(crate) const TARGET_TABLE: u32 = 2_147_483_648; +pub(crate) const TARGET_TABLE: TransitionTableIndex = TransitionTableIndex(2_147_483_648); #[cfg(test)] mod tests { diff --git a/src/speller/mod.rs b/src/speller/mod.rs index 8c6ef93..3d4384a 100644 --- a/src/speller/mod.rs +++ b/src/speller/mod.rs @@ -130,7 +130,7 @@ const fn default_n_best() -> Option { } const fn default_max_weight() -> Option { - Some(10000.0) + Some(Weight(10000.0)) } const fn default_beam() -> Option { @@ -428,8 +428,8 @@ where key_table .iter() .position(|x| x == &s) - .map(|x| x as u16) - .unwrap_or_else(|| alphabet.unknown().unwrap_or(0u16)) + .map(|x| SymbolNumber(x as u16)) + .unwrap_or_else(|| alphabet.unknown().unwrap_or(SymbolNumber::ZERO)) }) .collect() } @@ -463,7 +463,7 @@ where mode, words, } = case; - let mut best: HashMap = HashMap::new(); + let mut best: HashMap = HashMap::new(); for word in std::iter::once(&original_input).chain(words.iter()) { log::trace!("suggesting for word {}", word); @@ -511,11 +511,12 @@ where strsim::damerau_levenshtein(&words[0].as_str(), &word.as_str()) + strsim::damerau_levenshtein(&word.as_str(), sugg.value()); let penalty_middle = reweight.mid_penalty * distance as f32; - let additional_weight = if sugg.value.chars().all(|c| is_emoji(c)) { - 0.0 - } else { - penalty_start + penalty_end + penalty_middle - }; + let additional_weight = + Weight(if sugg.value.chars().all(|c| is_emoji(c)) { + 0.0 + } else { + penalty_start + penalty_end + penalty_middle + }); log::trace!( "Penalty: +{} = {} + {} * {} + {}", additional_weight, diff --git a/src/speller/worker.rs b/src/speller/worker.rs index 282dc19..375a26e 100644 --- a/src/speller/worker.rs +++ b/src/speller/worker.rs @@ -1,6 +1,5 @@ use hashbrown::HashMap; use smol_str::SmolStr; -use std::f32; use std::sync::Arc; use lifeguard::{Pool, Recycled}; @@ -9,11 +8,11 @@ use super::{HfstSpeller, OutputMode, SpellerConfig}; use crate::speller::suggestion::Suggestion; use crate::transducer::tree_node::TreeNode; use crate::transducer::Transducer; -use crate::types::{SymbolNumber, Weight}; +use crate::types::{SymbolNumber, TransitionTableIndex, ValueNumber, Weight}; #[inline(always)] fn speller_start_node(pool: &Pool, size: usize) -> Vec> { - let start_node = TreeNode::empty(pool, vec![0; size]); + let start_node = TreeNode::empty(pool, vec![ValueNumber::ZERO; size]); let mut nodes = Vec::with_capacity(256); nodes.push(start_node); nodes @@ -59,17 +58,19 @@ where let lexicon = self.speller.lexicon(); let operations = lexicon.alphabet().operations(); - if !lexicon.has_epsilons_or_flags(next_node.lexicon_state + 1) { + if !lexicon.has_epsilons_or_flags(next_node.lexicon_state.incr()) { return; } - let mut next = lexicon.next(next_node.lexicon_state, 0).unwrap(); + let mut next = lexicon + .next(next_node.lexicon_state, SymbolNumber::ZERO) + .unwrap(); while let Some(transition) = lexicon.take_epsilons_and_flags(next) { if let Some(sym) = lexicon.transition_input_symbol(next) { let transition_weight = transition.weight().unwrap(); - if sym == 0 { + if sym == SymbolNumber::ZERO { if self .is_under_weight_limit(max_weight, next_node.weight() + transition_weight) { @@ -85,7 +86,7 @@ where if let Some(op) = operation { if !self.is_under_weight_limit(max_weight, transition_weight) { - next += 1; + next = next.incr(); continue; } @@ -97,7 +98,7 @@ where } } - next += 1; + next = next.incr(); } } @@ -113,14 +114,16 @@ where let lexicon = self.speller.lexicon(); let alphabet_translator = self.speller.alphabet_translator(); - if !mutator.has_transitions(next_node.mutator_state + 1, Some(0)) { + if !mutator.has_transitions(next_node.mutator_state.incr(), Some(SymbolNumber::ZERO)) { return; } - let mut next_m = mutator.next(next_node.mutator_state, 0).unwrap(); + let mut next_m = mutator + .next(next_node.mutator_state, SymbolNumber::ZERO) + .unwrap(); while let Some(transition) = mutator.take_epsilons(next_m) { - if let Some(0) = transition.symbol() { + if let Some(SymbolNumber::ZERO) = transition.symbol() { if self.is_under_weight_limit( max_weight, next_node.weight() + transition.weight().unwrap(), @@ -129,20 +132,20 @@ where output_nodes.push(new_node); } - next_m += 1; + next_m = next_m.incr(); continue; } if let Some(sym) = transition.symbol() { - let trans_sym = alphabet_translator[sym as usize]; + let trans_sym = alphabet_translator[sym.0 as usize]; - if !lexicon.has_transitions(next_node.lexicon_state + 1, Some(trans_sym)) { + if !lexicon.has_transitions(next_node.lexicon_state.incr(), Some(trans_sym)) { // we have no regular transitions for this if trans_sym >= lexicon.alphabet().initial_symbol_count() { // this input was not originally in the alphabet, so unknown or identity // may apply if lexicon.has_transitions( - next_node.lexicon_state + 1, + next_node.lexicon_state.incr(), lexicon.alphabet().unknown(), ) { self.queue_lexicon_arcs( @@ -158,7 +161,7 @@ where } if lexicon.has_transitions( - next_node.lexicon_state + 1, + next_node.lexicon_state.incr(), lexicon.alphabet().identity(), ) { self.queue_lexicon_arcs( @@ -174,7 +177,7 @@ where } } - next_m += 1; + next_m = next_m.incr(); continue; } @@ -190,7 +193,7 @@ where ); } - next_m += 1; + next_m = next_m.incr(); } } @@ -201,7 +204,7 @@ where max_weight: Weight, next_node: &TreeNode, input_sym: SymbolNumber, - mutator_state: u32, + mutator_state: TransitionTableIndex, mutator_weight: Weight, input_increment: i16, output_nodes: &mut Vec>, @@ -217,7 +220,7 @@ where // Symbol replacement here is unfortunate but necessary. if let Some(id) = identity { if sym == id { - sym = self.input[next_node.input_state as usize]; + sym = self.input[next_node.input_state.0 as usize]; } } @@ -231,7 +234,7 @@ where OutputMode::WithoutTags => next_node.update( pool, input_sym, - Some(next_node.input_state + input_increment as u32), + Some(next_node.input_state.incr(input_increment as u32)), mutator_state, noneps_trans.target().unwrap(), noneps_trans.weight().unwrap() + mutator_weight, @@ -239,7 +242,7 @@ where OutputMode::WithTags => next_node.update( pool, sym, - Some(next_node.input_state + input_increment as u32), + Some(next_node.input_state.incr(input_increment as u32)), mutator_state, noneps_trans.target().unwrap(), noneps_trans.weight().unwrap() + mutator_weight, @@ -249,7 +252,7 @@ where } } - next += 1; + next = next.incr(); } } @@ -271,13 +274,13 @@ where while let Some(transition) = mutator.take_non_epsilons(next_m, input_sym) { let symbol = transition.symbol(); - if let Some(0) = symbol { + if let Some(SymbolNumber::ZERO) = symbol { let transition_weight = transition.weight().unwrap(); if self.is_under_weight_limit(max_weight, next_node.weight() + transition_weight) { let new_node = next_node.update( pool, - 0, - Some(next_node.input_state + 1), + SymbolNumber::ZERO, + Some(next_node.input_state.incr(1)), transition.target().unwrap(), next_node.lexicon_state, transition_weight, @@ -286,17 +289,17 @@ where output_nodes.push(new_node); } - next_m += 1; + next_m = next_m.incr(); continue; } if let Some(sym) = symbol { - let trans_sym = alphabet_translator[sym as usize]; + let trans_sym = alphabet_translator[sym.0 as usize]; - if !lexicon.has_transitions(next_node.lexicon_state + 1, Some(trans_sym)) { + if !lexicon.has_transitions(next_node.lexicon_state.incr(), Some(trans_sym)) { if trans_sym >= lexicon.alphabet().initial_symbol_count() { if lexicon.has_transitions( - next_node.lexicon_state + 1, + next_node.lexicon_state.incr(), lexicon.alphabet().unknown(), ) { self.queue_lexicon_arcs( @@ -311,7 +314,7 @@ where ); } if lexicon.has_transitions( - next_node.lexicon_state + 1, + next_node.lexicon_state.incr(), lexicon.alphabet().identity(), ) { self.queue_lexicon_arcs( @@ -326,7 +329,7 @@ where ); } } - next_m += 1; + next_m = next_m.incr(); continue; } @@ -341,7 +344,7 @@ where output_nodes, ); - next_m += 1; + next_m = next_m.incr(); } } } @@ -355,7 +358,7 @@ where output_nodes: &mut Vec>, ) { let mutator = self.speller.mutator(); - let input_state = next_node.input_state as usize; + let input_state = next_node.input_state.0 as usize; if input_state >= self.input.len() { return; @@ -363,12 +366,13 @@ where let input_sym = self.input[input_state]; - if !mutator.has_transitions(next_node.mutator_state + 1, Some(input_sym)) { + if !mutator.has_transitions(next_node.mutator_state.incr(), Some(input_sym)) { // we have no regular transitions for this if input_sym >= mutator.alphabet().initial_symbol_count() { - if mutator - .has_transitions(next_node.mutator_state + 1, mutator.alphabet().identity()) - { + if mutator.has_transitions( + next_node.mutator_state.incr(), + mutator.alphabet().identity(), + ) { self.queue_mutator_arcs( pool, max_weight, @@ -380,7 +384,7 @@ where // Check for unknown transition if mutator - .has_transitions(next_node.mutator_state + 1, mutator.alphabet().unknown()) + .has_transitions(next_node.mutator_state.incr(), mutator.alphabet().unknown()) { self.queue_mutator_arcs( pool, @@ -407,14 +411,14 @@ where let mutator = self.speller.mutator(); let lexicon = self.speller.lexicon(); let alphabet_translator = self.speller.alphabet_translator(); - let input_state = next_node.input_state as usize; + let input_state = next_node.input_state.0 as usize; if input_state >= self.input.len() { return; } - let input_sym = alphabet_translator[self.input[input_state as usize] as usize]; - let next_lexicon_state = next_node.lexicon_state + 1; + let input_sym = alphabet_translator[self.input[input_state as usize].0 as usize]; + let next_lexicon_state = next_node.lexicon_state.incr(); // log::trace!( // "lexicon consuming {}: {}", // input_sym, @@ -435,7 +439,7 @@ where &next_node, identity.unwrap(), next_node.mutator_state, - 0.0, + Weight::ZERO, 1, output_nodes, ); @@ -449,7 +453,7 @@ where &next_node, unknown.unwrap(), next_node.mutator_state, - 0.0, + Weight::ZERO, 1, output_nodes, ); @@ -465,7 +469,7 @@ where &next_node, input_sym, next_node.mutator_state, - 0.0, + Weight::ZERO, 1, output_nodes, ); @@ -476,7 +480,7 @@ where use std::cmp::Ordering::{Equal, Less}; let c = &self.config; - let mut max_weight = c.max_weight.unwrap_or(f32::MAX); + let mut max_weight = c.max_weight.unwrap_or(Weight::MAX); if let Some(beam) = c.beam { let candidate_weight = best_weight + beam; @@ -503,7 +507,7 @@ where #[inline(always)] fn state_size(&self) -> usize { - self.speller.lexicon().alphabet().state_size() as usize + self.speller.lexicon().alphabet().state_size().0 as usize } pub(crate) fn is_correct(&self) -> bool { @@ -513,14 +517,14 @@ where let mut nodes = speller_start_node(&pool, self.state_size() as usize); log::trace!("beginning is_correct {:?}?", self.input); while let Some(next_node) = nodes.pop() { - if next_node.input_state as usize == self.input.len() + if next_node.input_state.0 as usize == self.input.len() && self.speller.lexicon().is_final(next_node.lexicon_state) { return true; } - self.lexicon_epsilons(&pool, f32::INFINITY, &next_node, &mut nodes); - self.lexicon_consume(&pool, f32::INFINITY, &next_node, &mut nodes); + self.lexicon_epsilons(&pool, Weight::INFINITE, &next_node, &mut nodes); + self.lexicon_consume(&pool, Weight::INFINITE, &next_node, &mut nodes); } false @@ -534,7 +538,7 @@ where let mut lookups = HashMap::new(); let mut analyses: Vec = vec![]; while let Some(next_node) = nodes.pop() { - if next_node.input_state as usize == self.input.len() + if next_node.input_state.0 as usize == self.input.len() && self.speller.lexicon().is_final(next_node.lexicon_state) { let string = self @@ -553,8 +557,8 @@ where *entry = weight; } } - self.lexicon_epsilons(&pool, f32::INFINITY, &next_node, &mut nodes); - self.lexicon_consume(&pool, f32::INFINITY, &next_node, &mut nodes); + self.lexicon_epsilons(&pool, Weight::INFINITE, &next_node, &mut nodes); + self.lexicon_consume(&pool, Weight::INFINITE, &next_node, &mut nodes); analyses = self.generate_sorted_suggestions(&lookups); } analyses @@ -567,7 +571,7 @@ where let mut nodes = speller_start_node(&pool, self.state_size() as usize); let mut corrections = HashMap::new(); let mut suggestions: Vec = vec![]; - let mut best_weight = self.config.max_weight.unwrap_or(f32::MAX); + let mut best_weight = self.config.max_weight.unwrap_or(Weight::MAX); let key_table = self.speller.mutator().alphabet().key_table(); let mut iteration_count = 0usize; @@ -581,7 +585,7 @@ where let name: SmolStr = self .input .iter() - .map(|s| &*key_table[*s as usize]) + .map(|s| &*key_table[s.0 as usize]) .collect(); log::warn!("{}: iteration count at {}", name, iteration_count); log::warn!("Node count: {}", nodes.len()); @@ -596,7 +600,7 @@ where self.lexicon_epsilons(&pool, max_weight, &next_node, &mut nodes); self.mutator_epsilons(&pool, max_weight, &next_node, &mut nodes); - if next_node.input_state as usize != self.input.len() { + if next_node.input_state.0 as usize != self.input.len() { self.consume_input(&pool, max_weight, &next_node, &mut nodes); continue; } diff --git a/src/transducer/alphabet.rs b/src/transducer/alphabet.rs index 6090425..192309b 100644 --- a/src/transducer/alphabet.rs +++ b/src/transducer/alphabet.rs @@ -20,7 +20,9 @@ pub struct TransducerAlphabet { impl TransducerAlphabet { #[inline(always)] pub fn string_from_symbols(&self, syms: &[SymbolNumber]) -> SmolStr { - syms.iter().map(|s| &*self.key_table[*s as usize]).collect() + syms.iter() + .map(|s| &*self.key_table[s.0 as usize]) + .collect() } #[inline(always)] @@ -50,8 +52,10 @@ impl TransducerAlphabet { #[inline(always)] pub fn add_symbol(&mut self, string: &str) { - self.string_to_symbol - .insert(string.into(), self.key_table.len() as u16); + self.string_to_symbol.insert( + string.into(), + SymbolNumber(self.key_table.len().try_into().expect("too many symbols")), + ); self.key_table.push(string.into()); } @@ -91,14 +95,14 @@ impl TransducerAlphabet { let from_keys = from.key_table(); let mut translator = Vec::with_capacity(64); - translator.push(0); + translator.push(SymbolNumber::ZERO); for from_sym in from_keys.iter().skip(1) { log::trace!("key {}", from_sym); - if let Some(&sym) = self.string_to_symbol.get(from_sym) { - translator.push(sym); + if let Some(sym) = self.string_to_symbol.get(from_sym) { + translator.push(*sym); } else { - let lexicon_key = self.key_table.len() as SymbolNumber; + let lexicon_key = SymbolNumber(self.key_table.len() as u16); translator.push(lexicon_key); self.add_symbol(from_sym); } diff --git a/src/transducer/convert.rs b/src/transducer/convert.rs index 50baa1c..5e26ecd 100644 --- a/src/transducer/convert.rs +++ b/src/transducer/convert.rs @@ -7,6 +7,7 @@ use byteorder::{LittleEndian, WriteBytesExt}; use super::hfst; use super::thfst; use crate::transducer::Transducer; +use crate::types::{SymbolNumber, TransitionTableIndex}; pub trait ConvertFile { fn convert_file(transducer: &T, path: &Path) -> Result<(), std::io::Error>; @@ -48,20 +49,17 @@ impl ConvertFrom for thfst::MemmapIndexTable Result<(), std::io::Error> { - use std::{u16, u32}; - - // eprintln!( - // "size: {}, len: {}, offset: {}", - // table.size, table.len, table.offset - // ); - - for index in 0..table.size { - let input_symbol = table.input_symbol(index).unwrap_or(u16::MAX); - let targetish = table.target(index).unwrap_or(u32::MAX); - - writer.write_u16::(input_symbol).unwrap(); + for index in 0..table.size.0 { + let input_symbol = table + .input_symbol(TransitionTableIndex(index)) + .unwrap_or(SymbolNumber::MAX); + let targetish = table + .target(TransitionTableIndex(index)) + .unwrap_or(TransitionTableIndex::MAX); + + writer.write_u16::(input_symbol.0).unwrap(); writer.write_u16::(0).unwrap(); - writer.write_u32::(targetish).unwrap(); + writer.write_u32::(targetish.0).unwrap(); } Ok(()) @@ -73,23 +71,19 @@ impl ConvertFrom for thfst::MemmapTransitionTable Result<(), std::io::Error> { - use std::{u16, u32}; - - // eprintln!( - // "size: {}, len: {}, offset: {}", - // table.size, table.len, table.offset - // ); - - for index in 0..table.size { - let input_symbol = table.input_symbol(index).unwrap_or(u16::MAX); - let output_symbol = table.output_symbol(index).unwrap_or(u16::MAX); - let target = table.target(index).unwrap_or(u32::MAX); + for index in 0..table.size.0 { + let index = TransitionTableIndex(index); + let input_symbol = table.input_symbol(index).unwrap_or(SymbolNumber::MAX); + let output_symbol = table.output_symbol(index).unwrap_or(SymbolNumber::MAX); + let target = table.target(index).unwrap_or(TransitionTableIndex::MAX); let weight = table.weight(index).unwrap(); - writer.write_u16::(input_symbol).unwrap(); - writer.write_u16::(output_symbol).unwrap(); - writer.write_u32::(target).unwrap(); - writer.write_u32::(weight.to_bits()).unwrap(); + writer.write_u16::(input_symbol.0).unwrap(); + writer.write_u16::(output_symbol.0).unwrap(); + writer.write_u32::(target.0).unwrap(); + writer + .write_u32::(weight.0.to_bits()) + .unwrap(); } Ok(()) diff --git a/src/transducer/hfst/alphabet.rs b/src/transducer/hfst/alphabet.rs index 306394b..e555186 100644 --- a/src/transducer/hfst/alphabet.rs +++ b/src/transducer/hfst/alphabet.rs @@ -25,14 +25,14 @@ impl std::default::Default for TransducerAlphabetParser { fn default() -> Self { TransducerAlphabetParser { key_table: Vec::with_capacity(64), - flag_state_size: 0, + flag_state_size: SymbolNumber::ZERO, length: 0, string_to_symbol: HashMap::new(), operations: HashMap::new(), feature_bucket: HashMap::new(), value_bucket: HashMap::new(), - val_n: 0i16, - feat_n: 0u16, + val_n: ValueNumber::ZERO, + feat_n: SymbolNumber::ZERO, identity_symbol: None, unknown_symbol: None, } @@ -64,12 +64,12 @@ impl TransducerAlphabetParser { if !self.feature_bucket.contains_key(&feature) { self.feature_bucket.insert(feature.clone(), self.feat_n); - self.feat_n += 1; + self.feat_n = self.feat_n.incr(); } if !self.value_bucket.contains_key(&value) { self.value_bucket.insert(value.clone(), self.val_n); - self.val_n += 1; + self.val_n = self.val_n.incr(); } let op = FlagDiacriticOperation { @@ -85,7 +85,8 @@ impl TransducerAlphabetParser { fn parse_inner(&mut self, buf: &[u8], symbols: SymbolNumber) { let mut offset = 0usize; - for i in 0..symbols { + for i in 0..symbols.0 { + let i = SymbolNumber(i); let mut end = 0usize; while buf[offset + end] != 0 { @@ -100,7 +101,7 @@ impl TransducerAlphabetParser { } else if key == "@_EPSILON_SYMBOL_@" { self.value_bucket.insert("".into(), self.val_n); self.key_table.push("".into()); - self.val_n += 1; + self.val_n = self.val_n.incr(); } else if key == "@_IDENTITY_SYMBOL_@" { self.identity_symbol = Some(i); self.key_table.push(key); @@ -120,7 +121,12 @@ impl TransducerAlphabetParser { offset += end + 1; } - self.flag_state_size = self.feature_bucket.len() as SymbolNumber; + self.flag_state_size = SymbolNumber( + self.feature_bucket + .len() + .try_into() + .expect("Too many features in the alphabet, cannot fit into SymbolNumber"), + ); // Count remaining null padding bytes while buf[offset] == b'\0' { diff --git a/src/transducer/hfst/header.rs b/src/transducer/hfst/header.rs index 085025e..a735c8b 100644 --- a/src/transducer/hfst/header.rs +++ b/src/transducer/hfst/header.rs @@ -7,8 +7,8 @@ use crate::types::{HeaderFlag, SymbolNumber, TransitionTableIndex}; pub struct TransducerHeader { symbols: SymbolNumber, input_symbols: SymbolNumber, - trans_index_table: usize, - trans_target_table: usize, + trans_index_table: TransitionTableIndex, + trans_target_table: TransitionTableIndex, states: TransitionTableIndex, transitions: TransitionTableIndex, @@ -31,12 +31,12 @@ impl TransducerHeader { let pos = rdr.position() + u64::from(header_len); rdr.set_position(pos); - let input_symbols = rdr.read_u16::().unwrap(); - let symbols = rdr.read_u16::().unwrap(); - let trans_index_table = rdr.read_u32::().unwrap() as usize; - let trans_target_table = rdr.read_u32::().unwrap() as usize; - let states = rdr.read_u32::().unwrap(); - let transitions = rdr.read_u32::().unwrap(); + let input_symbols = SymbolNumber(rdr.read_u16::().unwrap()); + let symbols = SymbolNumber(rdr.read_u16::().unwrap()); + let trans_index_table = TransitionTableIndex(rdr.read_u32::().unwrap()); + let trans_target_table = TransitionTableIndex(rdr.read_u32::().unwrap()); + let states = TransitionTableIndex(rdr.read_u32::().unwrap()); + let transitions = TransitionTableIndex(rdr.read_u32::().unwrap()); let mut props = [false; 9]; @@ -65,11 +65,11 @@ impl TransducerHeader { self.input_symbols } - pub fn index_table_size(&self) -> usize { + pub fn index_table_size(&self) -> TransitionTableIndex { self.trans_index_table } - pub fn target_table_size(&self) -> usize { + pub fn target_table_size(&self) -> TransitionTableIndex { self.trans_target_table } diff --git a/src/transducer/hfst/index_table.rs b/src/transducer/hfst/index_table.rs index 2d50db2..013f46f 100644 --- a/src/transducer/hfst/index_table.rs +++ b/src/transducer/hfst/index_table.rs @@ -6,7 +6,6 @@ use std::fmt; use std::io::Cursor; use std::mem; use std::ptr; -use std::{u16, u32}; use crate::constants::INDEX_TABLE_SIZE; use crate::types::{SymbolNumber, TransitionTableIndex, Weight}; @@ -59,18 +58,17 @@ impl MappedIndexTable { return None; } - let index = self.offset + INDEX_TABLE_SIZE * i as usize; + let index = self.offset + INDEX_TABLE_SIZE * i.0 as usize; - let input_symbol: SymbolNumber = - if cfg!(all(target_arch = "arm", target_pointer_width = "32")) { - let mut cursor = self.make_cursor(); - cursor.set_position(index as u64); - cursor.read_u16::().unwrap() - } else { - unsafe { ptr::read(self.mmap.as_ptr().add(index) as *const _) } - }; + let input_symbol = if cfg!(all(target_arch = "arm", target_pointer_width = "32")) { + let mut cursor = self.make_cursor(); + cursor.set_position(index as u64); + SymbolNumber(cursor.read_u16::().unwrap()) + } else { + SymbolNumber(unsafe { ptr::read(self.mmap.as_ptr().add(index) as *const _) }) + }; - if input_symbol == u16::MAX { + if input_symbol == SymbolNumber::MAX { None } else { Some(input_symbol) @@ -83,17 +81,18 @@ impl MappedIndexTable { return None; } - let index = self.offset + INDEX_TABLE_SIZE * i as usize; - let target: TransitionTableIndex = - if cfg!(all(target_arch = "arm", target_pointer_width = "32")) { - let mut cursor = self.make_cursor(); - cursor.set_position((index + mem::size_of::()) as u64); - cursor.read_u32::().unwrap() - } else { - unsafe { ptr::read(self.mmap.as_ptr().add(index + 2) as *const _) } - }; - - if target == u32::MAX { + let index = self.offset + INDEX_TABLE_SIZE * i.0 as usize; + let target = if cfg!(all(target_arch = "arm", target_pointer_width = "32")) { + let mut cursor = self.make_cursor(); + cursor.set_position((index + mem::size_of::()) as u64); + TransitionTableIndex(cursor.read_u32::().unwrap()) + } else { + TransitionTableIndex(unsafe { + ptr::read(self.mmap.as_ptr().add(index + 2) as *const _) + }) + }; + + if target == TransitionTableIndex::MAX { None } else { Some(target) @@ -108,11 +107,11 @@ impl MappedIndexTable { return None; } - let index = self.offset + INDEX_TABLE_SIZE * i as usize; + let index = self.offset + INDEX_TABLE_SIZE * i.0 as usize; let weight: Weight = { let mut cursor = self.make_cursor(); cursor.set_position((index + mem::size_of::()) as u64); - cursor.read_f32::().unwrap() + Weight(cursor.read_f32::().unwrap()) }; Some(weight) diff --git a/src/transducer/hfst/mod.rs b/src/transducer/hfst/mod.rs index 2dba15a..ee33cd2 100644 --- a/src/transducer/hfst/mod.rs +++ b/src/transducer/hfst/mod.rs @@ -55,19 +55,17 @@ impl HfstTransducer { let index_table_offset = alphabet_offset + alphabet.len(); - let index_table_end = index_table_offset + INDEX_TABLE_SIZE * header.index_table_size(); + let index_table_end = + index_table_offset + INDEX_TABLE_SIZE * header.index_table_size().0 as usize; let index_table = MappedIndexTable::new( buf.clone(), index_table_offset, index_table_end, - header.index_table_size() as u32, + header.index_table_size(), ); - let trans_table = MappedTransitionTable::new( - buf.clone(), - index_table_end, - header.target_table_size() as u32, - ); + let trans_table = + MappedTransitionTable::new(buf.clone(), index_table_end, header.target_table_size()); HfstTransducer { buf, @@ -139,7 +137,10 @@ impl Transducer for HfstTransducer { None => false, } } else { - match self.index_table.input_symbol(i + u32::from(sym)) { + match self + .index_table + .input_symbol(i + TransitionTableIndex(sym.0 as u32)) + { Some(res) => sym == res, None => false, } @@ -150,10 +151,10 @@ impl Transducer for HfstTransducer { fn has_epsilons_or_flags(&self, i: TransitionTableIndex) -> bool { if i >= TARGET_TABLE { match self.transition_table.input_symbol(i - TARGET_TABLE) { - Some(sym) => sym == 0 || self.alphabet.is_flag(sym), + Some(sym) => sym == SymbolNumber::ZERO || self.alphabet.is_flag(sym), None => false, } - } else if let Some(0) = self.index_table.input_symbol(i) { + } else if let Some(SymbolNumber::ZERO) = self.index_table.input_symbol(i) { true } else { false @@ -162,7 +163,7 @@ impl Transducer for HfstTransducer { #[inline(always)] fn take_epsilons(&self, i: TransitionTableIndex) -> Option { - if let Some(0) = self.transition_table.input_symbol(i) { + if let Some(SymbolNumber::ZERO) = self.transition_table.input_symbol(i) { Some(self.transition_table.symbol_transition(i)) } else { None @@ -172,7 +173,7 @@ impl Transducer for HfstTransducer { #[inline(always)] fn take_epsilons_and_flags(&self, i: TransitionTableIndex) -> Option { if let Some(sym) = self.transition_table.input_symbol(i) { - if sym != 0 && !self.alphabet.is_flag(sym) { + if sym != SymbolNumber::ZERO && !self.alphabet.is_flag(sym) { None } else { Some(self.transition_table.symbol_transition(i)) @@ -202,8 +203,11 @@ impl Transducer for HfstTransducer { #[inline(always)] fn next(&self, i: TransitionTableIndex, symbol: SymbolNumber) -> Option { if i >= TARGET_TABLE { - Some(i - TARGET_TABLE + 1) - } else if let Some(v) = self.index_table.target(i + 1 + u32::from(symbol)) { + Some(i - TARGET_TABLE + TransitionTableIndex::ONE) + } else if let Some(v) = self + .index_table + .target(i + TransitionTableIndex(symbol.0 as u32 + 1)) + { Some(v - TARGET_TABLE) } else { None diff --git a/src/transducer/hfst/transition_table.rs b/src/transducer/hfst/transition_table.rs index bd9431e..a1cc909 100644 --- a/src/transducer/hfst/transition_table.rs +++ b/src/transducer/hfst/transition_table.rs @@ -5,9 +5,9 @@ use byteorder::{LittleEndian, ReadBytesExt}; use memmap2::Mmap; use std::fmt; use std::io::Cursor; +use std::mem; use std::ptr; use std::sync::Arc; -use std::{mem, u16, u32}; use crate::constants::TRANS_TABLE_SIZE; use crate::transducer::symbol_transition::SymbolTransition; @@ -28,7 +28,11 @@ impl fmt::Debug for MappedTransitionTable { impl MappedTransitionTable { #[inline(always)] - pub fn new(mmap: Arc, offset: usize, size: u32) -> MappedTransitionTable { + pub fn new( + mmap: Arc, + offset: usize, + size: TransitionTableIndex, + ) -> MappedTransitionTable { MappedTransitionTable { size, mmap, offset } } @@ -40,14 +44,14 @@ impl MappedTransitionTable { #[inline(always)] fn read_symbol_from_cursor(&self, index: usize) -> Option { let index = self.offset + index; - let x: SymbolNumber = if cfg!(all(target_arch = "arm", target_pointer_width = "32")) { + let x = if cfg!(all(target_arch = "arm", target_pointer_width = "32")) { let mut cursor = self.make_cursor(); cursor.set_position(index as u64); - cursor.read_u16::().unwrap() + SymbolNumber(cursor.read_u16::().unwrap()) } else { - unsafe { ptr::read(self.mmap.as_ptr().add(index) as *const _) } + SymbolNumber(unsafe { ptr::read(self.mmap.as_ptr().add(index) as *const _) }) }; - if x == u16::MAX { + if x == SymbolNumber::MAX { None } else { Some(x) @@ -60,7 +64,7 @@ impl MappedTransitionTable { return None; } - let index = TRANS_TABLE_SIZE as usize * i as usize; + let index = TRANS_TABLE_SIZE as usize * i.0 as usize; self.read_symbol_from_cursor(index) } @@ -70,7 +74,7 @@ impl MappedTransitionTable { return None; } - let index = ((TRANS_TABLE_SIZE * i as usize) + mem::size_of::()) as usize; + let index = ((TRANS_TABLE_SIZE * i.0 as usize) + mem::size_of::()) as usize; self.read_symbol_from_cursor(index) } @@ -80,18 +84,18 @@ impl MappedTransitionTable { return None; } - let index = - self.offset + ((TRANS_TABLE_SIZE * i as usize) + (2 * mem::size_of::())); + let index = self.offset + + ((TRANS_TABLE_SIZE * i.0 as usize) + (2 * mem::size_of::())); let x: TransitionTableIndex = if cfg!(all(target_arch = "arm", target_pointer_width = "32")) { let mut cursor = self.make_cursor(); cursor.set_position(index as u64); - cursor.read_u32::().unwrap() + TransitionTableIndex(cursor.read_u32::().unwrap()) } else { - unsafe { ptr::read(self.mmap.as_ptr().add(index) as *const _) } + TransitionTableIndex(unsafe { ptr::read(self.mmap.as_ptr().add(index) as *const _) }) }; - if x == u32::MAX { + if x == TransitionTableIndex::MAX { None } else { Some(x) @@ -105,23 +109,25 @@ impl MappedTransitionTable { } let index = self.offset - + ((TRANS_TABLE_SIZE * i as usize) + + ((TRANS_TABLE_SIZE * i.0 as usize) + (2 * mem::size_of::()) + mem::size_of::()); let x: Weight = if cfg!(all(target_arch = "arm", target_pointer_width = "32")) { let mut cursor = self.make_cursor(); cursor.set_position(index as u64); - cursor.read_f32::().unwrap() + Weight(cursor.read_f32::().unwrap()) } else { - unsafe { ptr::read(self.mmap.as_ptr().add(index) as *const _) } + Weight(unsafe { ptr::read(self.mmap.as_ptr().add(index) as *const _) }) }; Some(x) } #[inline(always)] pub fn is_final(&self, i: TransitionTableIndex) -> bool { - self.input_symbol(i) == None && self.output_symbol(i) == None && self.target(i) == Some(1) + self.input_symbol(i) == None + && self.output_symbol(i) == None + && self.target(i) == Some(TransitionTableIndex::ONE) } #[inline(always)] diff --git a/src/transducer/mod.rs b/src/transducer/mod.rs index 4e01e85..2480daf 100644 --- a/src/transducer/mod.rs +++ b/src/transducer/mod.rs @@ -108,7 +108,9 @@ pub trait TransitionTable: Sized { /// check if the state is a final state. #[inline(always)] fn is_final(&self, i: TransitionTableIndex) -> bool { - self.input_symbol(i) == None && self.output_symbol(i) == None && self.target(i) == Some(1) + self.input_symbol(i) == None + && self.output_symbol(i) == None + && self.target(i) == Some(TransitionTableIndex(1)) } /// ??? diff --git a/src/transducer/symbol_transition.rs b/src/transducer/symbol_transition.rs index 46c2ab8..5cacd40 100644 --- a/src/transducer/symbol_transition.rs +++ b/src/transducer/symbol_transition.rs @@ -45,7 +45,7 @@ impl SymbolTransition { pub fn clone_with_epsilon_symbol(&self) -> SymbolTransition { SymbolTransition { target: self.target, - symbol: Some(0), + symbol: Some(SymbolNumber(0)), weight: self.weight, } } diff --git a/src/transducer/thfst/chunked.rs b/src/transducer/thfst/chunked.rs index 48a0283..efd5967 100644 --- a/src/transducer/thfst/chunked.rs +++ b/src/transducer/thfst/chunked.rs @@ -19,9 +19,9 @@ where { // meta: MetaRecord, index_tables: Vec>, - indexes_per_chunk: u32, + indexes_per_chunk: TransitionTableIndex, transition_tables: Vec>, - transitions_per_chunk: u32, + transitions_per_chunk: TransitionTableIndex, alphabet: TransducerAlphabet, _file: std::marker::PhantomData, } @@ -32,7 +32,7 @@ macro_rules! transition_rel_index { ($self:expr, $x:expr) => {{ let index_page = $x / $self.transitions_per_chunk; let relative_index = $x - ($self.transitions_per_chunk * index_page); - (index_page as usize, relative_index) + (index_page.0 as usize, relative_index) }}; } @@ -40,7 +40,7 @@ macro_rules! index_rel_index { ($self:expr, $x:expr) => {{ let index_page = $x / $self.indexes_per_chunk; let relative_index = $x - ($self.indexes_per_chunk * index_page); - (index_page as usize, relative_index) + (index_page.0 as usize, relative_index) }}; } @@ -201,7 +201,7 @@ impl Transducer for ThfstChunkedTransducer { } } else { log::trace!("has_transitions: i:{} s:{:?}", i, s); - let (page, index) = index_rel_index!(self, i + u32::from(sym)); + let (page, index) = index_rel_index!(self, i + TransitionTableIndex(sym.0 as u32)); log::trace!("has_transitions: page:{} index:{:?}", page, index); if page >= self.index_tables.len() { return false; @@ -218,12 +218,12 @@ impl Transducer for ThfstChunkedTransducer { if i >= TARGET_TABLE { let (page, index) = transition_rel_index!(self, i - TARGET_TABLE); match self.transition_tables[page].input_symbol(index) { - Some(sym) => sym == 0 || self.alphabet.is_flag(sym), + Some(sym) => sym == SymbolNumber::ZERO || self.alphabet.is_flag(sym), None => false, } } else { let (page, index) = index_rel_index!(self, i); - if let Some(0) = self.index_tables[page].input_symbol(index) { + if let Some(SymbolNumber::ZERO) = self.index_tables[page].input_symbol(index) { true } else { false @@ -235,7 +235,7 @@ impl Transducer for ThfstChunkedTransducer { fn take_epsilons(&self, i: TransitionTableIndex) -> Option { let (page, index) = transition_rel_index!(self, i); - if let Some(0) = self.transition_tables[page].input_symbol(index) { + if let Some(SymbolNumber::ZERO) = self.transition_tables[page].input_symbol(index) { Some(self.transition_tables[page].symbol_transition(index)) } else { None @@ -247,7 +247,7 @@ impl Transducer for ThfstChunkedTransducer { let (page, index) = transition_rel_index!(self, i); if let Some(sym) = self.transition_tables[page].input_symbol(index) { - if sym != 0 && !self.alphabet.is_flag(sym) { + if sym != SymbolNumber::ZERO && !self.alphabet.is_flag(sym) { None } else { Some(self.transition_tables[page].symbol_transition(index)) @@ -278,9 +278,10 @@ impl Transducer for ThfstChunkedTransducer { #[inline(always)] fn next(&self, i: TransitionTableIndex, symbol: SymbolNumber) -> Option { if i >= TARGET_TABLE { - Some(i - TARGET_TABLE + 1) + Some(i - TARGET_TABLE + TransitionTableIndex(1)) } else { - let (page, index) = index_rel_index!(self, i + 1 + u32::from(symbol)); + let (page, index) = + index_rel_index!(self, i + TransitionTableIndex(symbol.0 as u32 + 1)); if let Some(v) = self.index_tables[page].target(index) { Some(v - TARGET_TABLE) diff --git a/src/transducer/thfst/index_table.rs b/src/transducer/thfst/index_table.rs index add31c4..be90dd9 100644 --- a/src/transducer/thfst/index_table.rs +++ b/src/transducer/thfst/index_table.rs @@ -9,7 +9,7 @@ use crate::vfs::{self, Filesystem}; #[derive(Debug)] pub struct MemmapIndexTable { buf: Mmap, - pub(crate) size: u32, + pub(crate) size: TransitionTableIndex, _file: std::marker::PhantomData, } @@ -32,7 +32,7 @@ impl MemmapIndexTable { file.partial_memory_map(chunk * len, len as usize) .map_err(TransducerError::Memmap)? }; - let size = (buf.len() / INDEX_TABLE_SIZE) as u32; + let size = TransitionTableIndex((buf.len() / INDEX_TABLE_SIZE) as u32); Ok(MemmapIndexTable { buf, size, @@ -49,7 +49,7 @@ impl crate::transducer::IndexTable for MemmapIndexTable { { let file = fs.open_file(path).map_err(TransducerError::Io)?; let buf = unsafe { file.memory_map().map_err(TransducerError::Memmap)? }; - let size = (buf.len() / INDEX_TABLE_SIZE) as u32; + let size = TransitionTableIndex((buf.len() / INDEX_TABLE_SIZE) as u32); Ok(MemmapIndexTable { buf, size, @@ -62,12 +62,12 @@ impl crate::transducer::IndexTable for MemmapIndexTable { return None; } - let index = INDEX_TABLE_SIZE * i as usize; + let index = INDEX_TABLE_SIZE * i.0 as usize; let input_symbol: SymbolNumber = unsafe { ptr::read(self.buf.as_ptr().add(index) as *const _) }; - if input_symbol == std::u16::MAX { + if input_symbol == SymbolNumber::MAX { None } else { Some(input_symbol) @@ -79,11 +79,11 @@ impl crate::transducer::IndexTable for MemmapIndexTable { return None; } - let index = (INDEX_TABLE_SIZE * i as usize) + 4; + let index = (INDEX_TABLE_SIZE * i.0 as usize) + 4; let target: TransitionTableIndex = unsafe { ptr::read(self.buf.as_ptr().add(index) as *const _) }; - if target == std::u32::MAX { + if target == TransitionTableIndex::MAX { None } else { Some(target) @@ -95,7 +95,7 @@ impl crate::transducer::IndexTable for MemmapIndexTable { return None; } - let index = (INDEX_TABLE_SIZE * i as usize) + 4; + let index = (INDEX_TABLE_SIZE * i.0 as usize) + 4; let weight: Weight = unsafe { ptr::read(self.buf.as_ptr().add(index) as *const _) }; Some(weight) @@ -113,7 +113,7 @@ mod unix { pub struct FileIndexTable { file: F, - size: u32, + size: TransitionTableIndex, } impl FileIndexTable { @@ -144,7 +144,7 @@ mod unix { { let file = fs.open_file(path).map_err(TransducerError::Io)?; Ok(FileIndexTable { - size: file.len().map_err(TransducerError::Io)? as u32, + size: TransitionTableIndex(file.len().map_err(TransducerError::Io)? as u32), file, }) } @@ -154,11 +154,11 @@ mod unix { return None; } - let index = INDEX_TABLE_SIZE * i as usize; + let index = INDEX_TABLE_SIZE * i.0 as usize; - let input_symbol: SymbolNumber = self.read_u16_at(index as u64); + let input_symbol = SymbolNumber(self.read_u16_at(index as u64)); - if input_symbol == std::u16::MAX { + if input_symbol == SymbolNumber::MAX { None } else { Some(input_symbol) @@ -170,10 +170,10 @@ mod unix { return None; } - let index = (INDEX_TABLE_SIZE * i as usize) + 4; - let target: TransitionTableIndex = self.read_u32_at(index as u64); + let index = (INDEX_TABLE_SIZE * i.0 as usize) + 4; + let target = TransitionTableIndex(self.read_u32_at(index as u64)); - if target == std::u32::MAX { + if target == TransitionTableIndex::MAX { None } else { Some(target) @@ -185,9 +185,9 @@ mod unix { return None; } - let index = (INDEX_TABLE_SIZE * i as usize) + 4; + let index = (INDEX_TABLE_SIZE * i.0 as usize) + 4; let x = self.read_u32_at(index as u64); - let weight: Weight = f32::from_bits(x); + let weight = Weight(f32::from_bits(x)); Some(weight) } diff --git a/src/transducer/thfst/mod.rs b/src/transducer/thfst/mod.rs index 44dd0c4..d1073fc 100644 --- a/src/transducer/thfst/mod.rs +++ b/src/transducer/thfst/mod.rs @@ -150,7 +150,10 @@ where None => false, } } else { - match self.index_table.input_symbol(i + u32::from(sym)) { + match self + .index_table + .input_symbol(i + TransitionTableIndex(sym.0 as u32)) + { Some(res) => sym == res, None => false, } @@ -161,10 +164,10 @@ where fn has_epsilons_or_flags(&self, i: TransitionTableIndex) -> bool { if i >= TARGET_TABLE { match self.transition_table.input_symbol(i - TARGET_TABLE) { - Some(sym) => sym == 0 || self.alphabet.is_flag(sym), + Some(sym) => sym == SymbolNumber::ZERO || self.alphabet.is_flag(sym), None => false, } - } else if let Some(0) = self.index_table.input_symbol(i) { + } else if let Some(SymbolNumber::ZERO) = self.index_table.input_symbol(i) { true } else { false @@ -173,7 +176,7 @@ where #[inline(always)] fn take_epsilons(&self, i: TransitionTableIndex) -> Option { - if let Some(0) = self.transition_table.input_symbol(i) { + if let Some(SymbolNumber::ZERO) = self.transition_table.input_symbol(i) { Some(self.transition_table.symbol_transition(i)) } else { None @@ -183,7 +186,7 @@ where #[inline(always)] fn take_epsilons_and_flags(&self, i: TransitionTableIndex) -> Option { if let Some(sym) = self.transition_table.input_symbol(i) { - if sym != 0 && !self.alphabet.is_flag(sym) { + if sym != SymbolNumber::ZERO && !self.alphabet.is_flag(sym) { None } else { Some(self.transition_table.symbol_transition(i)) @@ -213,8 +216,11 @@ where #[inline(always)] fn next(&self, i: TransitionTableIndex, symbol: SymbolNumber) -> Option { if i >= TARGET_TABLE { - Some(i - TARGET_TABLE + 1) - } else if let Some(v) = self.index_table.target(i + 1 + u32::from(symbol)) { + Some(i - TARGET_TABLE + TransitionTableIndex(1)) + } else if let Some(v) = self + .index_table + .target(i + TransitionTableIndex(symbol.0 as u32 + 1)) + { Some(v - TARGET_TABLE) } else { None diff --git a/src/transducer/thfst/transition_table.rs b/src/transducer/thfst/transition_table.rs index 4903fa1..16ed5f4 100644 --- a/src/transducer/thfst/transition_table.rs +++ b/src/transducer/thfst/transition_table.rs @@ -9,7 +9,7 @@ use memmap2::Mmap; #[derive(Debug)] pub struct MemmapTransitionTable { buf: Mmap, - pub(crate) size: u32, + pub(crate) size: TransitionTableIndex, _file: std::marker::PhantomData, } @@ -32,7 +32,7 @@ impl MemmapTransitionTable { file.partial_memory_map(chunk * len, len as usize) .map_err(TransducerError::Memmap)? }; - let size = (buf.len() / TRANS_TABLE_SIZE) as u32; + let size = TransitionTableIndex((buf.len() / TRANS_TABLE_SIZE) as u32); Ok(MemmapTransitionTable { buf, size, @@ -43,7 +43,7 @@ impl MemmapTransitionTable { #[inline] fn read_symbol_from_cursor(&self, index: usize) -> Option { let x = unsafe { ptr::read(self.buf.as_ptr().add(index) as *const _) }; - if x == std::u16::MAX { + if x == SymbolNumber::MAX { None } else { Some(x) @@ -62,7 +62,7 @@ impl TransitionTable for MemmapTransitionTable { let size = (buf.len() / TRANS_TABLE_SIZE) as u32; Ok(MemmapTransitionTable { buf, - size, + size: TransitionTableIndex(size), _file: std::marker::PhantomData::, }) } @@ -72,7 +72,7 @@ impl TransitionTable for MemmapTransitionTable { return None; } - let index = TRANS_TABLE_SIZE as usize * i as usize; + let index = TRANS_TABLE_SIZE as usize * i.0 as usize; self.read_symbol_from_cursor(index) } @@ -81,7 +81,7 @@ impl TransitionTable for MemmapTransitionTable { return None; } - let index = ((TRANS_TABLE_SIZE * i as usize) + mem::size_of::()) as usize; + let index = ((TRANS_TABLE_SIZE * i.0 as usize) + mem::size_of::()) as usize; self.read_symbol_from_cursor(index) } @@ -90,11 +90,11 @@ impl TransitionTable for MemmapTransitionTable { return None; } - let index = (TRANS_TABLE_SIZE * i as usize) + (2 * mem::size_of::()); + let index = (TRANS_TABLE_SIZE * i.0 as usize) + (2 * mem::size_of::()); let x: TransitionTableIndex = unsafe { ptr::read(self.buf.as_ptr().add(index) as *const _) }; - if x == std::u32::MAX { + if x == TransitionTableIndex::MAX { None } else { Some(x) @@ -106,7 +106,7 @@ impl TransitionTable for MemmapTransitionTable { return None; } - let index = (TRANS_TABLE_SIZE * i as usize) + let index = (TRANS_TABLE_SIZE * i.0 as usize) + (2 * mem::size_of::()) + mem::size_of::(); @@ -127,7 +127,7 @@ mod unix { pub struct FileTransitionTable { file: F, - size: u32, + size: TransitionTableIndex, } impl FileTransitionTable { @@ -158,7 +158,7 @@ mod unix { { let file = fs.open_file(path).map_err(TransducerError::Io)?; Ok(FileTransitionTable { - size: file.len().map_err(TransducerError::Io)? as u32, + size: TransitionTableIndex(file.len().map_err(TransducerError::Io)? as u32), file, }) } @@ -169,9 +169,9 @@ mod unix { return None; } - let index = TRANS_TABLE_SIZE as usize * i as usize; - let x = self.read_u16_at(index as u64); - if x == std::u16::MAX { + let index = TRANS_TABLE_SIZE as usize * i.0 as usize; + let x = SymbolNumber(self.read_u16_at(index as u64)); + if x == SymbolNumber::MAX { None } else { Some(x) @@ -184,9 +184,10 @@ mod unix { return None; } - let index = ((TRANS_TABLE_SIZE * i as usize) + mem::size_of::()) as usize; - let x = self.read_u16_at(index as u64); - if x == std::u16::MAX { + let index = + ((TRANS_TABLE_SIZE * i.0 as usize) + mem::size_of::()) as usize; + let x = SymbolNumber(self.read_u16_at(index as u64)); + if x == SymbolNumber::MAX { None } else { Some(x) @@ -199,10 +200,10 @@ mod unix { return None; } - let index = (TRANS_TABLE_SIZE * i as usize) + (2 * mem::size_of::()); + let index = (TRANS_TABLE_SIZE * i.0 as usize) + (2 * mem::size_of::()); - let x = self.read_u32_at(index as u64); - if x == std::u32::MAX { + let x = TransitionTableIndex(self.read_u32_at(index as u64)); + if x == TransitionTableIndex::MAX { None } else { Some(x) @@ -215,11 +216,11 @@ mod unix { return None; } - let index = (TRANS_TABLE_SIZE * i as usize) + let index = (TRANS_TABLE_SIZE * i.0 as usize) + (2 * mem::size_of::()) + mem::size_of::(); let x = self.read_u32_at(index as u64); - let x = f32::from_bits(x); + let x = Weight(f32::from_bits(x)); Some(x) } } diff --git a/src/transducer/tree_node.rs b/src/transducer/tree_node.rs index 61e58cb..c935006 100644 --- a/src/transducer/tree_node.rs +++ b/src/transducer/tree_node.rs @@ -4,16 +4,16 @@ use std::hash::{Hash, Hasher}; use super::symbol_transition::SymbolTransition; use crate::types::{ - FlagDiacriticOperation, FlagDiacriticOperator, FlagDiacriticState, SymbolNumber, - TransitionTableIndex, Weight, + FlagDiacriticOperation, FlagDiacriticOperator, FlagDiacriticState, InputIndex, SymbolNumber, + TransitionTableIndex, ValueNumber, Weight, }; #[derive(Debug, Clone)] pub(crate) struct TreeNode { pub(crate) lexicon_state: TransitionTableIndex, pub(crate) mutator_state: TransitionTableIndex, - pub(crate) input_state: u32, - pub(crate) weight: f32, + pub(crate) input_state: InputIndex, + pub(crate) weight: Weight, pub(crate) flag_state: FlagDiacriticState, pub(crate) string: Vec, } @@ -52,9 +52,9 @@ impl std::cmp::Eq for TreeNode {} impl Hash for TreeNode { fn hash(&self, state: &mut H) { - state.write_u32(self.input_state); - state.write_u32(self.mutator_state); - state.write_u32(self.lexicon_state); + self.input_state.hash(state); + self.mutator_state.hash(state); + self.lexicon_state.hash(state); } } @@ -62,11 +62,11 @@ impl lifeguard::Recycleable for TreeNode { fn new() -> Self { TreeNode { string: Vec::with_capacity(1), - input_state: 0, - mutator_state: 0, - lexicon_state: 0, + input_state: InputIndex(0), + mutator_state: TransitionTableIndex(0), + lexicon_state: TransitionTableIndex(0), flag_state: vec![], - weight: 0.0, + weight: Weight(0.0), } } @@ -105,11 +105,11 @@ impl TreeNode { ) -> Recycled<'a, TreeNode> { pool.attach(TreeNode { string: vec![], - input_state: 0, - mutator_state: 0, - lexicon_state: 0, + input_state: InputIndex(0), + mutator_state: TransitionTableIndex(0), + lexicon_state: TransitionTableIndex(0), flag_state: start_state, - weight: 0.0, + weight: Weight(0.0), }) } @@ -132,7 +132,7 @@ impl TreeNode { } if let Some(value) = transition.symbol() { - if value != 0 { + if value.0 != 0 { node.string.push(value); } } @@ -182,7 +182,7 @@ impl TreeNode { &self, pool: &'a Pool, output_symbol: SymbolNumber, - next_input: Option, + next_input: Option, next_mutator: TransitionTableIndex, next_lexicon: TransitionTableIndex, weight: Weight, @@ -194,7 +194,7 @@ impl TreeNode { node.string.extend(&self.string); } - if output_symbol != 0 { + if output_symbol.0 != 0 { node.string.push(output_symbol); } @@ -223,11 +223,11 @@ impl TreeNode { &self, pool: &'a Pool, feature: SymbolNumber, - value: i16, + value: ValueNumber, transition: &SymbolTransition, ) -> Recycled<'a, TreeNode> { let mut node = self.apply_transition(pool, transition); - node.flag_state[feature as usize] = value; + node.flag_state[feature.0 as usize] = value; node } @@ -270,13 +270,13 @@ impl TreeNode { Some(self.update_flag(pool, op.feature, op.value, transition)) } FlagDiacriticOperator::NegativeSet => { - Some(self.update_flag(pool, op.feature, -op.value, transition)) + Some(self.update_flag(pool, op.feature, op.value.invert(), transition)) } FlagDiacriticOperator::Require => { - let res = if op.value == 0 { - self.flag_state[op.feature as usize] != 0 + let res = if op.value.0 == 0 { + self.flag_state[op.feature.0 as usize] != ValueNumber(0) } else { - self.flag_state[op.feature as usize] == op.value + self.flag_state[op.feature.0 as usize] == op.value }; if res { @@ -286,10 +286,10 @@ impl TreeNode { } } FlagDiacriticOperator::Disallow => { - let res = if op.value == 0 { - self.flag_state[op.feature as usize] == 0 + let res = if op.value.0 == 0 { + self.flag_state[op.feature.0 as usize] == ValueNumber(0) } else { - self.flag_state[op.feature as usize] != op.value + self.flag_state[op.feature.0 as usize] != op.value }; if res { @@ -298,13 +298,15 @@ impl TreeNode { None } } - FlagDiacriticOperator::Clear => Some(self.update_flag(pool, op.feature, 0, transition)), + FlagDiacriticOperator::Clear => { + Some(self.update_flag(pool, op.feature, ValueNumber(0), transition)) + } FlagDiacriticOperator::Unification => { // if the feature is unset OR the feature is to this value already OR // the feature is negatively set to something else than this value - let f = self.flag_state[op.feature as usize]; + let f = self.flag_state[op.feature.0 as usize]; - if f == 0 || f == op.value || (f < 0 && -f != op.value) { + if f.0 == 0 || f == op.value || (f.0 < 0 && f.invert() != op.value) { Some(self.update_flag(pool, op.feature, op.value, transition)) } else { None diff --git a/src/types.rs b/src/types.rs index 437aeb0..7fbf593 100644 --- a/src/types.rs +++ b/src/types.rs @@ -1,3 +1,8 @@ +use std::{ + fmt::Display, + ops::{Add, Div, Mul, Sub}, +}; + use serde::{Deserialize, Serialize}; #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] @@ -46,9 +51,154 @@ pub struct FlagDiacriticOperation { pub value: ValueNumber, } -pub type SymbolNumber = u16; -pub type ValueNumber = i16; -pub type TransitionTableIndex = u32; -pub type Weight = f32; -pub type FlagDiacriticState = Vec; +#[derive(Clone, Copy, Debug, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord, Hash)] +#[repr(transparent)] +#[serde(transparent)] +pub struct SymbolNumber(pub(crate) u16); + +impl SymbolNumber { + pub(crate) const ZERO: Self = SymbolNumber(0); + pub(crate) const MAX: Self = SymbolNumber(u16::MAX); + + #[inline(always)] + pub(crate) fn incr(&self) -> Self { + Self(self.0 + 1) + } +} + +#[derive(Clone, Copy, Debug, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord)] +#[repr(transparent)] +#[serde(transparent)] +pub struct ValueNumber(pub(crate) i16); + +impl ValueNumber { + pub const ZERO: Self = ValueNumber(0); + + #[inline(always)] + pub(crate) fn invert(&self) -> Self { + ValueNumber(-self.0) + } + + #[inline(always)] + pub(crate) fn incr(&self) -> Self { + ValueNumber(self.0 + 1) + } +} + +#[derive(Clone, Copy, Debug, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord, Hash)] +#[repr(transparent)] +#[serde(transparent)] +pub struct InputIndex(pub(crate) u32); + +impl InputIndex { + #[inline(always)] + pub(crate) fn incr(&self, val: u32) -> Self { + Self(self.0 + val) + } +} + +#[derive(Clone, Copy, Debug, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord, Hash)] +#[repr(transparent)] +#[serde(transparent)] +pub struct TransitionTableIndex(pub(crate) u32); + +impl Display for TransitionTableIndex { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.0) + } +} + +impl Add for TransitionTableIndex { + type Output = Self; + + fn add(self, rhs: Self) -> Self::Output { + TransitionTableIndex(self.0 + rhs.0) + } +} + +impl Sub for TransitionTableIndex { + type Output = Self; + + fn sub(self, rhs: Self) -> Self::Output { + TransitionTableIndex(self.0 - rhs.0) + } +} + +impl Mul for TransitionTableIndex { + type Output = Self; + + fn mul(self, rhs: Self) -> Self::Output { + TransitionTableIndex(self.0 * rhs.0) + } +} + +impl Div for TransitionTableIndex { + type Output = Self; + + fn div(self, rhs: Self) -> Self::Output { + TransitionTableIndex(self.0 / rhs.0) + } +} + +impl TransitionTableIndex { + pub(crate) const MAX: Self = TransitionTableIndex(u32::MAX); + pub(crate) const ZERO: Self = TransitionTableIndex(0); + pub(crate) const ONE: Self = TransitionTableIndex(1); + + #[inline(always)] + pub(crate) fn incr(&self) -> Self { + Self(self.0 + 1) + } +} + +#[derive(Clone, Copy, Debug, Serialize, Deserialize, PartialEq, PartialOrd)] +#[repr(transparent)] +#[serde(transparent)] +pub struct Weight(pub f32); + +impl Weight { + pub const ZERO: Self = Weight(0.0); + pub const MAX: Self = Weight(f32::MAX); + pub const INFINITE: Self = Weight(f32::INFINITY); +} + +impl Display for Weight { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.0) + } +} + +impl Add for Weight { + type Output = Self; + + fn add(self, rhs: Self) -> Self::Output { + Weight(self.0 + rhs.0) + } +} + +impl Sub for Weight { + type Output = Self; + + fn sub(self, rhs: Self) -> Self::Output { + Weight(self.0 - rhs.0) + } +} + +impl Mul for Weight { + type Output = Self; + + fn mul(self, rhs: Self) -> Self::Output { + Weight(self.0 * rhs.0) + } +} + +impl Div for Weight { + type Output = Self; + + fn div(self, rhs: Self) -> Self::Output { + Weight(self.0 / rhs.0) + } +} + +pub type FlagDiacriticState = Vec; pub type OperationsMap = hashbrown::HashMap; From 85711d7408d3018b5b7cb137c94bce558a3f5fa9 Mon Sep 17 00:00:00 2001 From: Brendan Molloy Date: Thu, 9 Oct 2025 19:16:46 +0200 Subject: [PATCH 21/21] Make inners pub --- src/types.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/types.rs b/src/types.rs index 7fbf593..a51a890 100644 --- a/src/types.rs +++ b/src/types.rs @@ -54,7 +54,7 @@ pub struct FlagDiacriticOperation { #[derive(Clone, Copy, Debug, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord, Hash)] #[repr(transparent)] #[serde(transparent)] -pub struct SymbolNumber(pub(crate) u16); +pub struct SymbolNumber(pub u16); impl SymbolNumber { pub(crate) const ZERO: Self = SymbolNumber(0); @@ -69,7 +69,7 @@ impl SymbolNumber { #[derive(Clone, Copy, Debug, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord)] #[repr(transparent)] #[serde(transparent)] -pub struct ValueNumber(pub(crate) i16); +pub struct ValueNumber(pub i16); impl ValueNumber { pub const ZERO: Self = ValueNumber(0); @@ -88,7 +88,7 @@ impl ValueNumber { #[derive(Clone, Copy, Debug, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord, Hash)] #[repr(transparent)] #[serde(transparent)] -pub struct InputIndex(pub(crate) u32); +pub struct InputIndex(pub u32); impl InputIndex { #[inline(always)] @@ -100,7 +100,7 @@ impl InputIndex { #[derive(Clone, Copy, Debug, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord, Hash)] #[repr(transparent)] #[serde(transparent)] -pub struct TransitionTableIndex(pub(crate) u32); +pub struct TransitionTableIndex(pub u32); impl Display for TransitionTableIndex { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {