From 291af1a7056be934ed2cbe875f3e252f63ba2baf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lvaro=20Mond=C3=A9jar=20Rubio?= Date: Mon, 26 Aug 2024 01:42:04 +0200 Subject: [PATCH] Implement dummy sorting for `LanguageIdentifier`'s variants --- unic-langid-impl/src/lib.rs | 11 +++------- unic-langid-impl/src/parser/mod.rs | 4 +--- unic-langid-impl/src/subtags/variant.rs | 28 +++++++++++++++++++++++++ 3 files changed, 32 insertions(+), 11 deletions(-) diff --git a/unic-langid-impl/src/lib.rs b/unic-langid-impl/src/lib.rs index ece89a9b..d69c195a 100644 --- a/unic-langid-impl/src/lib.rs +++ b/unic-langid-impl/src/lib.rs @@ -130,9 +130,7 @@ impl LanguageIdentifier { variants: &[subtags::Variant], ) -> Self { let variants = if !variants.is_empty() { - let mut v = variants.to_vec(); - v.sort_unstable(); - v.dedup(); + let v = subtags::Variant::sort_and_deduplicate(variants); Some(v.into_boxed_slice()) } else { None @@ -298,13 +296,10 @@ impl LanguageIdentifier { /// assert_eq!(li.to_string(), "ca-ES-valencia"); /// ``` pub fn set_variants(&mut self, variants: &[subtags::Variant]) { - let mut v = variants.to_vec(); - - if v.is_empty() { + if variants.is_empty() { self.variants = None; } else { - v.sort_unstable(); - v.dedup(); + let v = subtags::Variant::sort_and_deduplicate(variants); self.variants = Some(v.into_boxed_slice()); } } diff --git a/unic-langid-impl/src/parser/mod.rs b/unic-langid-impl/src/parser/mod.rs index d750ebba..c22a0ff0 100644 --- a/unic-langid-impl/src/parser/mod.rs +++ b/unic-langid-impl/src/parser/mod.rs @@ -49,7 +49,7 @@ pub fn parse_language_identifier_from_iter<'a>( } else { // Variants if let Ok(v) = subtags::Variant::from_bytes(subtag) { - variants.push(v); + subtags::Variant::push_sorted_and_deduplicated(v, &mut variants); } else { break; } @@ -64,8 +64,6 @@ pub fn parse_language_identifier_from_iter<'a>( let variants = if variants.is_empty() { None } else { - variants.sort_unstable(); - variants.dedup(); Some(variants.into_boxed_slice()) }; diff --git a/unic-langid-impl/src/subtags/variant.rs b/unic-langid-impl/src/subtags/variant.rs index 5df930d6..ddbd85e0 100644 --- a/unic-langid-impl/src/subtags/variant.rs +++ b/unic-langid-impl/src/subtags/variant.rs @@ -1,5 +1,6 @@ use crate::parser::errors::ParserError; use std::str::FromStr; +use core::cmp::Ordering; use tinystr::TinyStr8; #[derive(Debug, PartialEq, Eq, Clone, Hash, PartialOrd, Ord, Copy)] @@ -37,6 +38,33 @@ impl Variant { pub const unsafe fn from_raw_unchecked(v: u64) -> Self { Self(TinyStr8::from_bytes_unchecked(v.to_le_bytes())) } + + // Utility function to push a variant to a vector, keeping it sorted and deduplicated. + #[inline] + pub(crate) fn push_sorted_and_deduplicated( + item: Variant, + vec: &mut Vec, + ) { + let mut i = 0; + while i < vec.len() { + match vec[i].cmp(&item) { + Ordering::Less => i += 1, + Ordering::Equal => return, + Ordering::Greater => break, + } + } + vec.insert(i, item); + } + + // Utility function to sort and deduplicate a vector of variants. + #[inline] + pub(crate) fn sort_and_deduplicate(vec: &[Variant]) -> Vec { + let mut new_vec = Vec::with_capacity(vec.len()); + for item in vec { + Self::push_sorted_and_deduplicated(*item, &mut new_vec); + } + new_vec + } } impl From for u64 {