Skip to content

Commit

Permalink
refactor: remove some unsafe (#20)
Browse files Browse the repository at this point in the history
  • Loading branch information
null8626 committed Oct 28, 2024
1 parent dcf7ad3 commit f087966
Show file tree
Hide file tree
Showing 17 changed files with 65 additions and 100 deletions.
Binary file modified bindings/java/bin/bindings.zip
Binary file not shown.
16 changes: 2 additions & 14 deletions bindings/java/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -145,13 +145,7 @@ pub unsafe extern "system" fn Java_com_github_null8626_decancer_CuredString_find
&[
JValueGen::Long(result.start as _),
JValueGen::Long(result.end as _),
JValueGen::Object(
&jni_unwrap!(
env,
env.new_string(unsafe { (*inner).get_unchecked(result) })
)
.into()
),
JValueGen::Object(&jni_unwrap!(env, env.new_string(unsafe { &(*inner)[result] })).into()),
]
)
);
Expand Down Expand Up @@ -185,13 +179,7 @@ pub unsafe extern "system" fn Java_com_github_null8626_decancer_CuredString_find
&[
JValueGen::Long(result.start as _),
JValueGen::Long(result.end as _),
JValueGen::Object(
&jni_unwrap!(
env,
env.new_string(unsafe { (*inner).get_unchecked(result) })
)
.into()
),
JValueGen::Object(&jni_unwrap!(env, env.new_string(unsafe { &(*inner)[result] })).into()),
]
)
);
Expand Down
4 changes: 2 additions & 2 deletions bindings/native/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -378,7 +378,7 @@ pub unsafe extern "C" fn decancer_equals_wide(
other_size: usize,
) -> bool {
utf16::get(other_str, other_size)
.map(|vec| unsafe { (*cured) == str::from_utf8_unchecked(&vec) })
.map(|vec| unsafe { (*cured) == str::from_utf8(&vec).unwrap() })
.unwrap_or_default()
}

Expand All @@ -403,7 +403,7 @@ macro_rules! comparison_fn {
other_size: usize,
) -> bool {
utf16::get(other_str, other_size)
.map(|vec| unsafe { (*cured).$name(str::from_utf8_unchecked(&vec)) })
.map(|vec| unsafe { (*cured).$name(str::from_utf8(&vec).unwrap()) })
.unwrap_or_default()
}
}
Expand Down
2 changes: 1 addition & 1 deletion bindings/native/src/utf16.rs
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ pub(crate) unsafe fn get_array(
output.push(unsafe {
let s = input_ptr.offset(i as _);

String::from_utf8_unchecked(get((*s).string, (*s).size)?)
String::from_utf8(get((*s).string, (*s).size)?).unwrap()
});
}

Expand Down
6 changes: 1 addition & 5 deletions bindings/native/src/utf8.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,7 @@ pub(crate) fn get(input_ptr: *mut u8, mut input_size: usize) -> Option<&'static
input_size = input_ptr.size;
}

unsafe {
Some(str::from_utf8_unchecked(slice::from_raw_parts(
input_ptr, input_size,
)))
}
str::from_utf8(unsafe { slice::from_raw_parts(input_ptr, input_size) }).ok()
}

pub(crate) unsafe fn get_array(
Expand Down
6 changes: 3 additions & 3 deletions bindings/node/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ use napi::{
bindgen_prelude::{Error, FromNapiValue},
Env, JsNumber, JsString, JsUnknown, Result, Status, ValueType,
};
use std::{mem::transmute, ops::Range};
use std::ops::Range;

macro_rules! options {
(
Expand Down Expand Up @@ -116,7 +116,7 @@ impl CuredString {
fn new_match(&self, mat: Range<usize>) -> Match {
Match {
range: mat.clone(),
portion: String::from(unsafe { self.0.get_unchecked(mat) }),
portion: String::from(&self.0[mat]),
}
}

Expand Down Expand Up @@ -218,7 +218,7 @@ fn cure(input: String, maybe_options: JsUnknown) -> Result<CuredString> {
<Option<Options> as FromNapiValue>::from_unknown(maybe_options).map(options)
}?;

match decancer::cure(&input, unsafe { transmute(options) }) {
match decancer::cure(&input, options.into()) {
Ok(output) => Ok(CuredString(output)),
Err(err) => Err(Error::new(Status::InvalidArg, err)),
}
Expand Down
Binary file modified bindings/wasm/bin/decancer.wasm
Binary file not shown.
8 changes: 4 additions & 4 deletions bindings/wasm/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#![allow(non_snake_case, dead_code)]
#![allow(non_snake_case)]

use std::{convert::AsRef, mem::transmute, ops::Range};
use std::{convert::AsRef, ops::Range};
use wasm_bindgen::prelude::*;

#[wasm_bindgen]
Expand All @@ -27,7 +27,7 @@ impl CuredString {
Match {
start: mat.start,
end: mat.end,
portion: String::from(unsafe { self.0.get_unchecked(mat) }),
portion: String::from(&self.0[mat]),
}
}

Expand Down Expand Up @@ -83,7 +83,7 @@ impl CuredString {

#[wasm_bindgen]
pub fn cure(input: &str, options: u32) -> Result<CuredString, JsError> {
match decancer::cure(input, unsafe { transmute(options) }) {
match decancer::cure(input, options.into()) {
Ok(output) => Ok(CuredString(output)),
Err(err) => Err(JsError::new(<decancer::Error as AsRef<str>>::as_ref(&err))),
}
Expand Down
1 change: 1 addition & 0 deletions core/build.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
fn main() {
println!("cargo:rerun-if-changed=bin/bidi.bin");
println!("cargo:rerun-if-changed=bin/codepoints.bin");
}
38 changes: 20 additions & 18 deletions core/src/bidi/paragraph.rs
Original file line number Diff line number Diff line change
Expand Up @@ -575,24 +575,27 @@ impl Paragraph {
last.level.new_explicit_next_ltr()
};

if new_level.is_ok() && overflow_isolate_count == 0 && overflow_embedding_count == 0 {
// SAFETY: new_level was already proven to be Ok
let new_level = unsafe { new_level.unwrap_unchecked() };

stack.push(Status {
level: new_level,
status: original_classes[idx].override_status(),
});
match (new_level, overflow_isolate_count, overflow_embedding_count) {
(Ok(new_level), 0, 0) => {
stack.push(Status {
level: new_level,
status: original_classes[idx].override_status(),
});

if is_isolate {
valid_isolate_count += 1;
} else {
levels[idx] = new_level;
}
}

if is_isolate {
valid_isolate_count += 1;
} else {
levels[idx] = new_level;
_ => {
if is_isolate {
overflow_isolate_count += 1;
} else if overflow_isolate_count == 0 {
overflow_embedding_count += 1;
}
}
} else if is_isolate {
overflow_isolate_count += 1;
} else if overflow_isolate_count == 0 {
overflow_embedding_count += 1;
}

if !is_isolate {
Expand Down Expand Up @@ -700,8 +703,7 @@ impl Paragraph {
.unwrap_or(start_class);

let mut sequence = if start_class == Class::PDI && stack.len() > 1 {
// SAFETY: stack is already checked to be not empty
unsafe { stack.pop().unwrap_unchecked() }
stack.pop().unwrap()
} else {
Vec::with_capacity(1)
};
Expand Down
4 changes: 2 additions & 2 deletions core/src/leetspeak.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ pub(crate) fn find(haystack: &str, character: u32) -> Option<usize> {
_ => return None,
};

let regex = unsafe { REGEXES.as_slice().get_unchecked(idx as usize).as_ref() };
let regex = &REGEXES[idx as usize];

regex?.find(haystack).map(|mat| mat.len())
regex.as_ref()?.find(haystack).map(|mat| mat.len())
}
23 changes: 7 additions & 16 deletions core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -89,14 +89,9 @@ const fn is_special_rtl(code: u32) -> bool {
}

fn cure_char_inner(code: u32, options: Options) -> Translation {
// SAFETY: even if there is no lowercase mapping for some codepoints, it would just return itself.
// therefore, the first iteration and/or codepoint always exists.
let code_lowercased = unsafe {
char::from_u32_unchecked(code)
.to_lowercase()
.next()
.unwrap_unchecked() as _
};
let code_lowercased = char::from_u32(code)
.and_then(|character| character.to_lowercase().next())
.unwrap() as _;

let is_case_sensitive = code != code_lowercased;

Expand Down Expand Up @@ -281,8 +276,7 @@ fn first_cure_pass(input: &str) -> (String, Vec<Class>, Vec<Paragraph>) {
_ => {}
}

// SAFETY: the only modification to this codepoint is in the if-statement above.
refined_input.push(unsafe { char::from_u32_unchecked(codepoint) });
refined_input.push(char::from_u32(codepoint).unwrap());

idx += character_len;
}
Expand Down Expand Up @@ -438,11 +432,8 @@ macro_rules! cure {
#[macro_export]
macro_rules! format {
($string:expr) => {
// SAFETY: having disable_bidi enabled removes all possibilities of an Err
unsafe {
$crate::cure($string, $crate::Options::formatter())
.unwrap_unchecked()
.into()
}
$crate::cure($string, $crate::Options::formatter())
.unwrap()
.into()
};
}
14 changes: 10 additions & 4 deletions core/src/options.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
use crate::{codepoints::Codepoint, Translation};
use paste::paste;
use std::cmp::Ordering;
#[cfg(feature = "options")]
use std::mem::transmute;

/// A configuration struct where you can customize decancer's behavior.
///
Expand Down Expand Up @@ -126,11 +124,10 @@ impl Options {
}

#[cfg(feature = "options")]
#[allow(clippy::transmute_int_to_bool)]
pub(crate) const fn refuse_cure(self, attributes: u8) -> bool {
let locale = attributes >> 1;

(unsafe { transmute(attributes & 1) } && self.is(2)) || (locale > 2 && self.is(locale))
((attributes & 1) != 0 && self.is(2)) || (locale > 2 && self.is(locale))
}

pub(crate) const fn translate(self, code: u32, offset: i32, mut end: i32) -> Option<Translation> {
Expand Down Expand Up @@ -160,3 +157,12 @@ impl Options {
None
}
}

#[doc(hidden)]
#[cfg(feature = "options")]
impl From<u32> for Options {
#[inline(always)]
fn from(value: u32) -> Self {
Self(value)
}
}
23 changes: 7 additions & 16 deletions core/src/similar.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,8 @@ pub(crate) const SIMILAR_START: u16 = read_u16_le(unsafe { CODEPOINTS.offset(2)
pub(crate) const SIMILAR_END: u16 = read_u16_le(unsafe { CODEPOINTS.offset(4) });

pub(crate) fn is(self_char: char, other_char: char) -> bool {
// SAFETY: even if there is no lowercase mapping for some codepoints, it would just return itself.
// therefore, the first iteration and/or codepoint always exists.
let self_char = unsafe { self_char.to_lowercase().next().unwrap_unchecked() as u32 };
let other_char = unsafe { other_char.to_lowercase().next().unwrap_unchecked() as u32 };
let self_char = self_char.to_lowercase().next().unwrap() as u32;
let other_char = other_char.to_lowercase().next().unwrap() as u32;

if self_char == other_char {
return true;
Expand Down Expand Up @@ -93,8 +91,7 @@ impl<'a> Iterator for CachedPeek<'a> {
return None;
}

// SAFETY: the current index always points to an existing element
let current = unsafe { *self.cache.get_unchecked(self.index) };
let current = self.cache[self.index];
let next_element = self.next_value();

if next_element.is_none() {
Expand Down Expand Up @@ -124,26 +121,21 @@ impl<'a, 'b> Matcher<'a, 'b> {
self_str = "";
}

// SAFETY: self_str = "" would immediately cancel out the first iteration.
Self {
self_iterator: self_str.chars(),
#[cfg(feature = "leetspeak")]
self_str,
self_index: 0,
other_iterator: CachedPeek::new(other_chars, unsafe { other_first.unwrap_unchecked() }),
other_iterator: CachedPeek::new(other_chars, other_first.unwrap()),
}
}

#[cfg(feature = "leetspeak")]
fn matches_leetspeak(&mut self, other_char: char) -> Option<usize> {
// SAFETY: already guaranteed to be within the string's bounds.
let haystack = unsafe { self.self_str.get_unchecked(self.self_index..) };
let haystack = &self.self_str[self.self_index..];
let matched_len = leetspeak::find(haystack, other_char as _)?;

// SAFETY: this will never go out of bounds as well
// the furthest it would go would be an empty string.
self.self_iterator =
unsafe { self.self_str.get_unchecked(self.self_index + matched_len..) }.chars();
self.self_iterator = self.self_str[self.self_index + matched_len..].chars();

Some(matched_len)
}
Expand Down Expand Up @@ -241,8 +233,7 @@ impl<'a, 'b> Iterator for Matcher<'a, 'b> {
self.other_iterator.restart();

current_separator = None;
// SAFETY: this state of the program wouldn't be accessible if the first iteration returns a None
current_other = unsafe { self.other_iterator.next().unwrap_unchecked() };
current_other = self.other_iterator.next().unwrap();

let (skipped, matched_skip) = self.skip_until(current_other.0)?;

Expand Down
14 changes: 2 additions & 12 deletions core/src/string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ use crate::{
use serde::{de, Deserialize, Deserializer, Serialize, Serializer};
use std::{
fmt::{self, Debug, Display, Formatter},
mem::transmute,
ops::{Deref, Range},
};

Expand All @@ -18,12 +17,6 @@ use std::{
pub struct CuredString(pub(crate) String);

impl CuredString {
#[deprecated(since = "3.1.2", note = "use .into() instead")]
#[inline(always)]
pub fn into_str(self) -> String {
self.into()
}

/// Iterates throughout this string and yields every similar-looking match.
///
/// If you plan on using this method with an array of strings, use [`find_multiple`][CuredString::find_multiple].
Expand Down Expand Up @@ -78,9 +71,7 @@ impl CuredString {

for mat in matches {
// SAFETY: mat is always within the mat of self
let chars = unsafe { original.get_unchecked(mat.clone()) }
.chars()
.count();
let chars = original[mat.clone()].chars().count();
let mut with_str = String::with_capacity(chars);

for _ in 0..chars {
Expand Down Expand Up @@ -224,8 +215,7 @@ impl CuredString {
impl From<CuredString> for String {
#[inline(always)]
fn from(val: CuredString) -> Self {
// SAFETY: see definition of CuredString
unsafe { transmute(val) }
val.0
}
}

Expand Down
2 changes: 1 addition & 1 deletion core/src/translation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ impl Translation {
#[cfg(feature = "options")]
pub(crate) fn into_uppercase(self) -> Self {
match self {
Self::Character(c) => Self::Character(unsafe { c.to_uppercase().next().unwrap_unchecked() }),
Self::Character(c) => Self::Character(c.to_uppercase().next().unwrap()),
Self::String(s) => Self::String(Cow::Owned(s.as_ref().to_uppercase())),
Self::None => Self::None,
}
Expand Down
4 changes: 2 additions & 2 deletions core/src/util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,8 @@ where
let mut j = 0;

for i in 1..ranges.len() {
let current = unsafe { ranges.get_unchecked(i).clone() };
let previous = unsafe { ranges.get_unchecked_mut(j) };
let current = ranges[i].clone();
let previous = &mut ranges[j];

if current.start >= previous.start && current.start <= previous.end {
previous.end = max(current.end, previous.end);
Expand Down

0 comments on commit f087966

Please sign in to comment.