diff --git a/bindings/java/bin/bindings.zip b/bindings/java/bin/bindings.zip index 22cc94ea..492b68ac 100644 Binary files a/bindings/java/bin/bindings.zip and b/bindings/java/bin/bindings.zip differ diff --git a/bindings/java/src/lib.rs b/bindings/java/src/lib.rs index fa225946..89013342 100644 --- a/bindings/java/src/lib.rs +++ b/bindings/java/src/lib.rs @@ -145,13 +145,7 @@ pub unsafe extern "system" fn Java_com_github_null8626_decancer_CuredString_find &[ JValueGen::Long(result.start as _), JValueGen::Long(result.end as _), - JValueGen::Object( - &jni_unwrap!( - env, - env.new_string(unsafe { (*inner).get_unchecked(result) }) - ) - .into() - ), + JValueGen::Object(&jni_unwrap!(env, env.new_string(unsafe { &(*inner)[result] })).into()), ] ) ); @@ -185,13 +179,7 @@ pub unsafe extern "system" fn Java_com_github_null8626_decancer_CuredString_find &[ JValueGen::Long(result.start as _), JValueGen::Long(result.end as _), - JValueGen::Object( - &jni_unwrap!( - env, - env.new_string(unsafe { (*inner).get_unchecked(result) }) - ) - .into() - ), + JValueGen::Object(&jni_unwrap!(env, env.new_string(unsafe { &(*inner)[result] })).into()), ] ) ); diff --git a/bindings/native/src/lib.rs b/bindings/native/src/lib.rs index 9e17652f..d458f912 100644 --- a/bindings/native/src/lib.rs +++ b/bindings/native/src/lib.rs @@ -378,7 +378,7 @@ pub unsafe extern "C" fn decancer_equals_wide( other_size: usize, ) -> bool { utf16::get(other_str, other_size) - .map(|vec| unsafe { (*cured) == str::from_utf8_unchecked(&vec) }) + .map(|vec| unsafe { (*cured) == str::from_utf8(&vec).unwrap() }) .unwrap_or_default() } @@ -403,7 +403,7 @@ macro_rules! comparison_fn { other_size: usize, ) -> bool { utf16::get(other_str, other_size) - .map(|vec| unsafe { (*cured).$name(str::from_utf8_unchecked(&vec)) }) + .map(|vec| unsafe { (*cured).$name(str::from_utf8(&vec).unwrap()) }) .unwrap_or_default() } } diff --git a/bindings/native/src/utf16.rs b/bindings/native/src/utf16.rs index 3d218722..988f5415 100644 --- a/bindings/native/src/utf16.rs +++ b/bindings/native/src/utf16.rs @@ -64,7 +64,7 @@ pub(crate) unsafe fn get_array( output.push(unsafe { let s = input_ptr.offset(i as _); - String::from_utf8_unchecked(get((*s).string, (*s).size)?) + String::from_utf8(get((*s).string, (*s).size)?).unwrap() }); } diff --git a/bindings/native/src/utf8.rs b/bindings/native/src/utf8.rs index 51fbdb9b..4035dbfd 100644 --- a/bindings/native/src/utf8.rs +++ b/bindings/native/src/utf8.rs @@ -21,11 +21,7 @@ pub(crate) fn get(input_ptr: *mut u8, mut input_size: usize) -> Option<&'static input_size = input_ptr.size; } - unsafe { - Some(str::from_utf8_unchecked(slice::from_raw_parts( - input_ptr, input_size, - ))) - } + str::from_utf8(unsafe { slice::from_raw_parts(input_ptr, input_size) }).ok() } pub(crate) unsafe fn get_array( diff --git a/bindings/node/src/lib.rs b/bindings/node/src/lib.rs index fc2ff89e..957f0a4f 100644 --- a/bindings/node/src/lib.rs +++ b/bindings/node/src/lib.rs @@ -7,7 +7,7 @@ use napi::{ bindgen_prelude::{Error, FromNapiValue}, Env, JsNumber, JsString, JsUnknown, Result, Status, ValueType, }; -use std::{mem::transmute, ops::Range}; +use std::ops::Range; macro_rules! options { ( @@ -116,7 +116,7 @@ impl CuredString { fn new_match(&self, mat: Range) -> Match { Match { range: mat.clone(), - portion: String::from(unsafe { self.0.get_unchecked(mat) }), + portion: String::from(&self.0[mat]), } } @@ -218,7 +218,7 @@ fn cure(input: String, maybe_options: JsUnknown) -> Result { as FromNapiValue>::from_unknown(maybe_options).map(options) }?; - match decancer::cure(&input, unsafe { transmute(options) }) { + match decancer::cure(&input, options.into()) { Ok(output) => Ok(CuredString(output)), Err(err) => Err(Error::new(Status::InvalidArg, err)), } diff --git a/bindings/wasm/bin/decancer.wasm b/bindings/wasm/bin/decancer.wasm index 3c8bb2af..51569208 100644 Binary files a/bindings/wasm/bin/decancer.wasm and b/bindings/wasm/bin/decancer.wasm differ diff --git a/bindings/wasm/src/lib.rs b/bindings/wasm/src/lib.rs index 2ce5cc13..1a1bfb13 100644 --- a/bindings/wasm/src/lib.rs +++ b/bindings/wasm/src/lib.rs @@ -1,6 +1,6 @@ -#![allow(non_snake_case, dead_code)] +#![allow(non_snake_case)] -use std::{convert::AsRef, mem::transmute, ops::Range}; +use std::{convert::AsRef, ops::Range}; use wasm_bindgen::prelude::*; #[wasm_bindgen] @@ -27,7 +27,7 @@ impl CuredString { Match { start: mat.start, end: mat.end, - portion: String::from(unsafe { self.0.get_unchecked(mat) }), + portion: String::from(&self.0[mat]), } } @@ -83,7 +83,7 @@ impl CuredString { #[wasm_bindgen] pub fn cure(input: &str, options: u32) -> Result { - match decancer::cure(input, unsafe { transmute(options) }) { + match decancer::cure(input, options.into()) { Ok(output) => Ok(CuredString(output)), Err(err) => Err(JsError::new(>::as_ref(&err))), } diff --git a/core/build.rs b/core/build.rs index 18bb76f2..b3676500 100644 --- a/core/build.rs +++ b/core/build.rs @@ -1,3 +1,4 @@ fn main() { + println!("cargo:rerun-if-changed=bin/bidi.bin"); println!("cargo:rerun-if-changed=bin/codepoints.bin"); } diff --git a/core/src/bidi/paragraph.rs b/core/src/bidi/paragraph.rs index 021859ef..0ba68931 100644 --- a/core/src/bidi/paragraph.rs +++ b/core/src/bidi/paragraph.rs @@ -575,24 +575,27 @@ impl Paragraph { last.level.new_explicit_next_ltr() }; - if new_level.is_ok() && overflow_isolate_count == 0 && overflow_embedding_count == 0 { - // SAFETY: new_level was already proven to be Ok - let new_level = unsafe { new_level.unwrap_unchecked() }; - - stack.push(Status { - level: new_level, - status: original_classes[idx].override_status(), - }); + match (new_level, overflow_isolate_count, overflow_embedding_count) { + (Ok(new_level), 0, 0) => { + stack.push(Status { + level: new_level, + status: original_classes[idx].override_status(), + }); + + if is_isolate { + valid_isolate_count += 1; + } else { + levels[idx] = new_level; + } + } - if is_isolate { - valid_isolate_count += 1; - } else { - levels[idx] = new_level; + _ => { + if is_isolate { + overflow_isolate_count += 1; + } else if overflow_isolate_count == 0 { + overflow_embedding_count += 1; + } } - } else if is_isolate { - overflow_isolate_count += 1; - } else if overflow_isolate_count == 0 { - overflow_embedding_count += 1; } if !is_isolate { @@ -700,8 +703,7 @@ impl Paragraph { .unwrap_or(start_class); let mut sequence = if start_class == Class::PDI && stack.len() > 1 { - // SAFETY: stack is already checked to be not empty - unsafe { stack.pop().unwrap_unchecked() } + stack.pop().unwrap() } else { Vec::with_capacity(1) }; diff --git a/core/src/leetspeak.rs b/core/src/leetspeak.rs index b1615a92..519b37b9 100644 --- a/core/src/leetspeak.rs +++ b/core/src/leetspeak.rs @@ -52,7 +52,7 @@ pub(crate) fn find(haystack: &str, character: u32) -> Option { _ => return None, }; - let regex = unsafe { REGEXES.as_slice().get_unchecked(idx as usize).as_ref() }; + let regex = ®EXES[idx as usize]; - regex?.find(haystack).map(|mat| mat.len()) + regex.as_ref()?.find(haystack).map(|mat| mat.len()) } diff --git a/core/src/lib.rs b/core/src/lib.rs index b31ffcc3..3b87e95f 100644 --- a/core/src/lib.rs +++ b/core/src/lib.rs @@ -89,14 +89,9 @@ const fn is_special_rtl(code: u32) -> bool { } fn cure_char_inner(code: u32, options: Options) -> Translation { - // SAFETY: even if there is no lowercase mapping for some codepoints, it would just return itself. - // therefore, the first iteration and/or codepoint always exists. - let code_lowercased = unsafe { - char::from_u32_unchecked(code) - .to_lowercase() - .next() - .unwrap_unchecked() as _ - }; + let code_lowercased = char::from_u32(code) + .and_then(|character| character.to_lowercase().next()) + .unwrap() as _; let is_case_sensitive = code != code_lowercased; @@ -281,8 +276,7 @@ fn first_cure_pass(input: &str) -> (String, Vec, Vec) { _ => {} } - // SAFETY: the only modification to this codepoint is in the if-statement above. - refined_input.push(unsafe { char::from_u32_unchecked(codepoint) }); + refined_input.push(char::from_u32(codepoint).unwrap()); idx += character_len; } @@ -438,11 +432,8 @@ macro_rules! cure { #[macro_export] macro_rules! format { ($string:expr) => { - // SAFETY: having disable_bidi enabled removes all possibilities of an Err - unsafe { - $crate::cure($string, $crate::Options::formatter()) - .unwrap_unchecked() - .into() - } + $crate::cure($string, $crate::Options::formatter()) + .unwrap() + .into() }; } diff --git a/core/src/options.rs b/core/src/options.rs index 2eb4edc7..0ca8d0b0 100644 --- a/core/src/options.rs +++ b/core/src/options.rs @@ -1,8 +1,6 @@ use crate::{codepoints::Codepoint, Translation}; use paste::paste; use std::cmp::Ordering; -#[cfg(feature = "options")] -use std::mem::transmute; /// A configuration struct where you can customize decancer's behavior. /// @@ -126,11 +124,10 @@ impl Options { } #[cfg(feature = "options")] - #[allow(clippy::transmute_int_to_bool)] pub(crate) const fn refuse_cure(self, attributes: u8) -> bool { let locale = attributes >> 1; - (unsafe { transmute(attributes & 1) } && self.is(2)) || (locale > 2 && self.is(locale)) + ((attributes & 1) != 0 && self.is(2)) || (locale > 2 && self.is(locale)) } pub(crate) const fn translate(self, code: u32, offset: i32, mut end: i32) -> Option { @@ -160,3 +157,12 @@ impl Options { None } } + +#[doc(hidden)] +#[cfg(feature = "options")] +impl From for Options { + #[inline(always)] + fn from(value: u32) -> Self { + Self(value) + } +} diff --git a/core/src/similar.rs b/core/src/similar.rs index 99e2087d..fee10352 100644 --- a/core/src/similar.rs +++ b/core/src/similar.rs @@ -10,10 +10,8 @@ pub(crate) const SIMILAR_START: u16 = read_u16_le(unsafe { CODEPOINTS.offset(2) pub(crate) const SIMILAR_END: u16 = read_u16_le(unsafe { CODEPOINTS.offset(4) }); pub(crate) fn is(self_char: char, other_char: char) -> bool { - // SAFETY: even if there is no lowercase mapping for some codepoints, it would just return itself. - // therefore, the first iteration and/or codepoint always exists. - let self_char = unsafe { self_char.to_lowercase().next().unwrap_unchecked() as u32 }; - let other_char = unsafe { other_char.to_lowercase().next().unwrap_unchecked() as u32 }; + let self_char = self_char.to_lowercase().next().unwrap() as u32; + let other_char = other_char.to_lowercase().next().unwrap() as u32; if self_char == other_char { return true; @@ -93,8 +91,7 @@ impl<'a> Iterator for CachedPeek<'a> { return None; } - // SAFETY: the current index always points to an existing element - let current = unsafe { *self.cache.get_unchecked(self.index) }; + let current = self.cache[self.index]; let next_element = self.next_value(); if next_element.is_none() { @@ -124,26 +121,21 @@ impl<'a, 'b> Matcher<'a, 'b> { self_str = ""; } - // SAFETY: self_str = "" would immediately cancel out the first iteration. Self { self_iterator: self_str.chars(), #[cfg(feature = "leetspeak")] self_str, self_index: 0, - other_iterator: CachedPeek::new(other_chars, unsafe { other_first.unwrap_unchecked() }), + other_iterator: CachedPeek::new(other_chars, other_first.unwrap()), } } #[cfg(feature = "leetspeak")] fn matches_leetspeak(&mut self, other_char: char) -> Option { - // SAFETY: already guaranteed to be within the string's bounds. - let haystack = unsafe { self.self_str.get_unchecked(self.self_index..) }; + let haystack = &self.self_str[self.self_index..]; let matched_len = leetspeak::find(haystack, other_char as _)?; - // SAFETY: this will never go out of bounds as well - // the furthest it would go would be an empty string. - self.self_iterator = - unsafe { self.self_str.get_unchecked(self.self_index + matched_len..) }.chars(); + self.self_iterator = self.self_str[self.self_index + matched_len..].chars(); Some(matched_len) } @@ -241,8 +233,7 @@ impl<'a, 'b> Iterator for Matcher<'a, 'b> { self.other_iterator.restart(); current_separator = None; - // SAFETY: this state of the program wouldn't be accessible if the first iteration returns a None - current_other = unsafe { self.other_iterator.next().unwrap_unchecked() }; + current_other = self.other_iterator.next().unwrap(); let (skipped, matched_skip) = self.skip_until(current_other.0)?; diff --git a/core/src/string.rs b/core/src/string.rs index 92a93426..ebc01eee 100644 --- a/core/src/string.rs +++ b/core/src/string.rs @@ -6,7 +6,6 @@ use crate::{ use serde::{de, Deserialize, Deserializer, Serialize, Serializer}; use std::{ fmt::{self, Debug, Display, Formatter}, - mem::transmute, ops::{Deref, Range}, }; @@ -18,12 +17,6 @@ use std::{ pub struct CuredString(pub(crate) String); impl CuredString { - #[deprecated(since = "3.1.2", note = "use .into() instead")] - #[inline(always)] - pub fn into_str(self) -> String { - self.into() - } - /// Iterates throughout this string and yields every similar-looking match. /// /// If you plan on using this method with an array of strings, use [`find_multiple`][CuredString::find_multiple]. @@ -78,9 +71,7 @@ impl CuredString { for mat in matches { // SAFETY: mat is always within the mat of self - let chars = unsafe { original.get_unchecked(mat.clone()) } - .chars() - .count(); + let chars = original[mat.clone()].chars().count(); let mut with_str = String::with_capacity(chars); for _ in 0..chars { @@ -224,8 +215,7 @@ impl CuredString { impl From for String { #[inline(always)] fn from(val: CuredString) -> Self { - // SAFETY: see definition of CuredString - unsafe { transmute(val) } + val.0 } } diff --git a/core/src/translation.rs b/core/src/translation.rs index fed434fb..98a82d94 100644 --- a/core/src/translation.rs +++ b/core/src/translation.rs @@ -47,7 +47,7 @@ impl Translation { #[cfg(feature = "options")] pub(crate) fn into_uppercase(self) -> Self { match self { - Self::Character(c) => Self::Character(unsafe { c.to_uppercase().next().unwrap_unchecked() }), + Self::Character(c) => Self::Character(c.to_uppercase().next().unwrap()), Self::String(s) => Self::String(Cow::Owned(s.as_ref().to_uppercase())), Self::None => Self::None, } diff --git a/core/src/util.rs b/core/src/util.rs index 12ce599c..b9444191 100644 --- a/core/src/util.rs +++ b/core/src/util.rs @@ -44,8 +44,8 @@ where let mut j = 0; for i in 1..ranges.len() { - let current = unsafe { ranges.get_unchecked(i).clone() }; - let previous = unsafe { ranges.get_unchecked_mut(j) }; + let current = ranges[i].clone(); + let previous = &mut ranges[j]; if current.start >= previous.start && current.start <= previous.end { previous.end = max(current.end, previous.end);