Skip to content

Commit

Permalink
refactor: remove all unsafe code in rust, wasm, and node crates (#20)
Browse files Browse the repository at this point in the history
  • Loading branch information
null8626 committed Oct 28, 2024
1 parent 7d65bf3 commit 354810f
Show file tree
Hide file tree
Showing 11 changed files with 75 additions and 58 deletions.
12 changes: 6 additions & 6 deletions bindings/node/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#![allow(clippy::inherent_to_string)]
#![forbid(unsafe_code)]

#[macro_use]
extern crate napi_derive;
Expand Down Expand Up @@ -30,18 +30,18 @@ macro_rules! options {
)*
}

impl Into<u32> for Options {
fn into(self) -> u32 {
impl From<Options> for u32 {
fn from(value: Options) -> u32 {
let mut options = 0;

$(
if self.$key_name.unwrap_or_default() {
if value.$key_name.unwrap_or_default() {
options |= (1 << $key_idx);
}
)*

$(
if self.$override_name.unwrap_or_default() {
if value.$override_name.unwrap_or_default() {
options = $override_value;
}
)*
Expand Down Expand Up @@ -129,7 +129,7 @@ impl CuredString {
pub fn find_multiple(&self, other: Vec<String>) -> Vec<Match> {
self
.0
.find_multiple(&other)
.find_multiple(other)
.into_iter()
.map(|mat| self.new_match(mat))
.collect()
Expand Down
1 change: 1 addition & 0 deletions bindings/wasm/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#![allow(non_snake_case)]
#![forbid(unsafe_code)]

use std::{convert::AsRef, ops::Range};
use wasm_bindgen::prelude::*;
Expand Down
9 changes: 4 additions & 5 deletions core/src/bidi/brackets.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use super::{BIDI, BIDI_BRACKETS_COUNT};
use crate::util::{read_u16_le, read_u32_le, CODEPOINT_MASK};
use crate::util::CODEPOINT_MASK;

pub(crate) struct BracketPair {
pub(crate) start: usize,
Expand Down Expand Up @@ -30,11 +30,10 @@ impl OpeningBracket {

while start <= end {
let mid = (start + end) / 2;
let offset = (4 + (mid * 5)) as isize;
let offset = (4 + (mid * 5)) as _;

let first = read_u32_le(unsafe { BIDI.offset(offset) });
let opening =
((read_u16_le(unsafe { BIDI.offset(offset + 4) }) as u32) << 8) | ((first >> 20) & 0xff);
let first = BIDI.u32_at(offset);
let opening = ((BIDI.u16_at(offset + 4) as u32) << 8) | ((first >> 20) & 0xff);

let diff = (first >> 28) & 7;

Expand Down
9 changes: 4 additions & 5 deletions core/src/bidi/class.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use super::{OverrideStatus, BIDI, BIDI_DICTIONARY_COUNT, BIDI_DICTIONARY_OFFSET};
use crate::util::{numbered_enum, read_u16_le, read_u32_le, CODEPOINT_MASK};
use crate::util::{numbered_enum, CODEPOINT_MASK};

numbered_enum! {
#[allow(dead_code)]
Expand Down Expand Up @@ -38,15 +38,14 @@ impl Class {

while start <= end {
let mid = (start + end) / 2;
let offset = ((BIDI_DICTIONARY_OFFSET as i32) + (mid * 6)) as isize;

let kv = read_u32_le(unsafe { BIDI.offset(offset) });
let offset = ((BIDI_DICTIONARY_OFFSET as i32) + (mid * 6)) as _;
let kv = BIDI.u32_at(offset);

let other = kv & CODEPOINT_MASK;

if code < other {
end = mid - 1;
} else if code > (other + read_u16_le(unsafe { BIDI.offset(offset + 4) }) as u32) {
} else if code > (other + BIDI.u16_at(offset + 4) as u32) {
start = mid + 1;
} else {
return Some(((kv >> 20) as u8).into());
Expand Down
8 changes: 4 additions & 4 deletions core/src/bidi/mod.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
mod class;

use crate::util::read_u16_le;
use crate::util::Binary;
pub(crate) use class::Class;

const BIDI: *const u8 = include_bytes!("../../bin/bidi.bin").as_ptr();
const BIDI: Binary<'static> = Binary::new(include_bytes!("../../bin/bidi.bin"));

const BIDI_DICTIONARY_OFFSET: u16 = read_u16_le(BIDI);
const BIDI_DICTIONARY_COUNT: u16 = unsafe { read_u16_le(BIDI.offset(2)) };
const BIDI_DICTIONARY_OFFSET: u16 = BIDI.u16_at(0);
const BIDI_DICTIONARY_COUNT: u16 = BIDI.u16_at(2);
const BIDI_BRACKETS_COUNT: u16 = ((BIDI_DICTIONARY_OFFSET - 4) / 5) - 1;

mod brackets;
Expand Down
20 changes: 9 additions & 11 deletions core/src/codepoints.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,15 @@ use crate::Options;
use crate::{
similar::SIMILAR_START,
translation::Translation,
util::{read_u16_le, read_u32_le, CODEPOINT_MASK},
util::{Binary, CODEPOINT_MASK},
};
use std::cmp::Ordering;

pub(crate) const CODEPOINTS: *const u8 = include_bytes!("../bin/codepoints.bin").as_ptr();
pub(crate) const CODEPOINTS: Binary<'static> = Binary::new(include_bytes!("../bin/codepoints.bin"));

pub(crate) const CASE_SENSITIVE_CODEPOINTS_COUNT: u16 =
((SIMILAR_START - CASE_SENSITIVE_CODEPOINTS_OFFSET) / 6) - 1;
pub(crate) const CASE_SENSITIVE_CODEPOINTS_OFFSET: u16 = read_u16_le(CODEPOINTS);
pub(crate) const CASE_SENSITIVE_CODEPOINTS_OFFSET: u16 = CODEPOINTS.u16_at(0);
pub(crate) const CODEPOINTS_COUNT: u16 = ((CASE_SENSITIVE_CODEPOINTS_OFFSET - 6) / 6) - 1;

const RANGE_MASK: u32 = 0x0800_0000;
Expand Down Expand Up @@ -46,13 +46,11 @@ impl Codepoint {
}

pub(crate) const fn at(offset: i32) -> Self {
unsafe {
Self(
read_u32_le(CODEPOINTS.offset(offset as _)),
*CODEPOINTS.offset((4 + offset) as _),
*CODEPOINTS.offset((5 + offset) as _),
)
}
Self(
CODEPOINTS.u32_at(offset as _),
CODEPOINTS.at((4 + offset) as _),
CODEPOINTS.at((5 + offset) as _),
)
}

pub(crate) const fn matches(
Expand Down Expand Up @@ -80,7 +78,7 @@ impl Codepoint {
Some(Ordering::Equal)
}

pub(crate) const fn translation(self, other: u32) -> Translation {
pub(crate) fn translation(self, other: u32) -> Translation {
if self.is_string_translation() {
Translation::string(self.0, self.1)
} else {
Expand Down
3 changes: 2 additions & 1 deletion core/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#![doc = include_str!("../README.md")]
#![allow(clippy::upper_case_acronyms)]
#![cfg_attr(docsrs, feature(doc_cfg))]
#![forbid(unsafe_code)]

mod bidi;
mod codepoints;
Expand Down Expand Up @@ -434,6 +435,6 @@ macro_rules! format {
($string:expr) => {
$crate::cure($string, $crate::Options::formatter())
.unwrap()
.into()
.into::<String>()
};
}
2 changes: 1 addition & 1 deletion core/src/options.rs
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ impl Options {
((attributes & 1) != 0 && self.is(2)) || (locale > 2 && self.is(locale))
}

pub(crate) const fn translate(self, code: u32, offset: i32, mut end: i32) -> Option<Translation> {
pub(crate) fn translate(self, code: u32, offset: i32, mut end: i32) -> Option<Translation> {
let mut start = 0;

while start <= end {
Expand Down
11 changes: 4 additions & 7 deletions core/src/similar.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,10 @@
#[cfg(feature = "leetspeak")]
use crate::leetspeak;
use crate::{
codepoints::CODEPOINTS,
util::{read_u16_le, unwrap_or_ret},
};
use crate::{codepoints::CODEPOINTS, util::unwrap_or_ret};
use std::{iter::FusedIterator, ops::Range, str::Chars};

pub(crate) const SIMILAR_START: u16 = read_u16_le(unsafe { CODEPOINTS.offset(2) });
pub(crate) const SIMILAR_END: u16 = read_u16_le(unsafe { CODEPOINTS.offset(4) });
pub(crate) const SIMILAR_START: u16 = CODEPOINTS.u16_at(2);
pub(crate) const SIMILAR_END: u16 = CODEPOINTS.u16_at(4);

pub(crate) fn is(self_char: char, other_char: char) -> bool {
let self_char = self_char.to_lowercase().next().unwrap() as u32;
Expand All @@ -20,7 +17,7 @@ pub(crate) fn is(self_char: char, other_char: char) -> bool {
let mut contains_b = false;

for offset in SIMILAR_START..SIMILAR_END {
let cur = unsafe { *(CODEPOINTS.offset(offset as _)) };
let cur = CODEPOINTS.at(offset as _);
let sim = cur & 0x7f;

if sim == (self_char as u8) {
Expand Down
25 changes: 11 additions & 14 deletions core/src/translation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,8 @@ use std::{
borrow::Cow,
cmp::PartialEq,
fmt::{self, Debug, Display},
mem::transmute,
ops::AddAssign,
slice, str,
str,
};

/// The translation for a single character/codepoint.
Expand All @@ -27,21 +26,19 @@ pub enum Translation {
}

impl Translation {
pub(crate) const fn string(integer: u32, second_byte: u8) -> Self {
unsafe {
let string = str::from_utf8_unchecked(slice::from_raw_parts(
CODEPOINTS.offset(
(STRINGS_OFFSET + (((((integer >> 20) as u16) & 0x07) << 8) | (second_byte as u16))) as _,
),
pub(crate) fn string(integer: u32, second_byte: u8) -> Self {
Self::String(Cow::Borrowed(
str::from_utf8(CODEPOINTS.sliced(
(STRINGS_OFFSET + (((((integer >> 20) as u16) & 0x07) << 8) | (second_byte as u16))) as _,
((integer >> 23) & 0x1f) as _,
));

Self::String(Cow::Borrowed(string))
}
))
.unwrap(),
))
}

pub(crate) const fn character(code: u32) -> Self {
Self::Character(unsafe { transmute(code) })
#[inline(always)]
pub(crate) fn character(code: u32) -> Self {
Self::Character(char::from_u32(code).unwrap())
}

#[cfg(feature = "options")]
Expand Down
33 changes: 29 additions & 4 deletions core/src/util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,37 @@ use std::{

pub(crate) const CODEPOINT_MASK: u32 = 0x000f_ffff;

pub(crate) const fn read_u32_le(ptr: *const u8) -> u32 {
unsafe { u32::from_le_bytes([*ptr, *ptr.offset(1), *ptr.offset(2), *ptr.offset(3)]) }
#[derive(Copy, Clone)]
pub(crate) struct Binary<'a> {
bytes: &'a [u8],
}

pub(crate) const fn read_u16_le(ptr: *const u8) -> u16 {
unsafe { u16::from_le_bytes([*ptr, *ptr.offset(1)]) }
impl<'a> Binary<'a> {
pub(crate) const fn new(bytes: &'a [u8]) -> Self {
Self { bytes }
}

pub(crate) const fn at(self, offset: usize) -> u8 {
self.bytes[offset]
}

#[inline(always)]
pub(crate) fn sliced(self, offset: usize, size: usize) -> &'a [u8] {
&self.bytes[offset..offset + size]
}

pub(crate) const fn u16_at(self, offset: usize) -> u16 {
u16::from_le_bytes([self.at(offset), self.at(offset + 1)])
}

pub(crate) const fn u32_at(self, offset: usize) -> u32 {
u32::from_le_bytes([
self.at(offset),
self.at(offset + 1),
self.at(offset + 2),
self.at(offset + 3),
])
}
}

#[inline(always)]
Expand Down

0 comments on commit 354810f

Please sign in to comment.