From 1368ee952a19e0740d554315d5c445bdeb55133e Mon Sep 17 00:00:00 2001 From: 5225225 <5225225@mailbox.org> Date: Thu, 30 Jun 2022 23:26:08 +0100 Subject: [PATCH 1/5] Add bounds checked get_unchecked, use it everywhere. --- src/avx2/deser.rs | 21 +++++----- src/charutils.rs | 34 +++++++-------- src/lib.rs | 11 +++-- src/neon/deser.rs | 33 +++++++-------- src/numberparse.rs | 93 +++++++++++++++++++++--------------------- src/safer_unchecked.rs | 37 +++++++++++++++++ src/simd128/deser.rs | 21 +++++----- src/sse42/deser.rs | 21 +++++----- src/stage2.rs | 5 ++- src/stringparse.rs | 11 ++--- src/value.rs | 3 +- src/value/borrowed.rs | 3 +- src/value/owned.rs | 3 +- 13 files changed, 174 insertions(+), 122 deletions(-) create mode 100644 src/safer_unchecked.rs diff --git a/src/avx2/deser.rs b/src/avx2/deser.rs index f4a53f44..70e52d60 100644 --- a/src/avx2/deser.rs +++ b/src/avx2/deser.rs @@ -12,6 +12,7 @@ use std::arch::x86_64::{ use std::mem; pub use crate::error::{Error, ErrorType}; +use crate::safer_unchecked::GetSaferUnchecked; use crate::stringparse::{handle_unicode_codepoint, ESCAPE_MAP}; use crate::Deserializer; pub use crate::Result; @@ -44,7 +45,7 @@ impl<'de> Deserializer<'de> { // This is safe since we check sub's length in the range access above and only // create sub sliced form sub to `sub.len()`. - let src: &[u8] = unsafe { data.get_unchecked(idx..) }; + let src: &[u8] = unsafe { data.get_kinda_unchecked(idx..) }; let mut src_i: usize = 0; let mut len = src_i; loop { @@ -77,7 +78,7 @@ impl<'de> Deserializer<'de> { len += quote_dist as usize; unsafe { - let v = input.get_unchecked(idx..idx + len) as *const [u8] as *const str; + let v = input.get_kinda_unchecked(idx..idx + len) as *const [u8] as *const str; return Ok(&*v); } @@ -142,10 +143,10 @@ impl<'de> Deserializer<'de> { dst_i += quote_dist as usize; unsafe { input - .get_unchecked_mut(idx + len..idx + len + dst_i) - .clone_from_slice(buffer.get_unchecked(..dst_i)); + .get_kinda_unchecked_mut(idx + len..idx + len + dst_i) + .clone_from_slice(buffer.get_kinda_unchecked(..dst_i)); let v = - input.get_unchecked(idx..idx + len + dst_i) as *const [u8] as *const str; + input.get_kinda_unchecked(idx..idx + len + dst_i) as *const [u8] as *const str; return Ok(&*v); } @@ -155,7 +156,7 @@ impl<'de> Deserializer<'de> { if (quote_bits.wrapping_sub(1) & bs_bits) != 0 { // find out where the backspace is let bs_dist: u32 = bs_bits.trailing_zeros(); - let escape_char: u8 = unsafe { *src.get_unchecked(src_i + bs_dist as usize + 1) }; + let escape_char: u8 = unsafe { *src.get_kinda_unchecked(src_i + bs_dist as usize + 1) }; // we encountered backslash first. Handle backslash if escape_char == b'u' { // move src/dst up to the start; they will be further adjusted @@ -163,8 +164,8 @@ impl<'de> Deserializer<'de> { src_i += bs_dist as usize; dst_i += bs_dist as usize; let (o, s) = if let Ok(r) = - handle_unicode_codepoint(unsafe { src.get_unchecked(src_i..) }, unsafe { - buffer.get_unchecked_mut(dst_i..) + handle_unicode_codepoint(unsafe { src.get_kinda_unchecked(src_i..) }, unsafe { + buffer.get_kinda_unchecked_mut(dst_i..) }) { r } else { @@ -182,12 +183,12 @@ impl<'de> Deserializer<'de> { // note this may reach beyond the part of the buffer we've actually // seen. I think this is ok let escape_result: u8 = - unsafe { *ESCAPE_MAP.get_unchecked(escape_char as usize) }; + unsafe { *ESCAPE_MAP.get_kinda_unchecked(escape_char as usize) }; if escape_result == 0 { return Err(Self::raw_error(src_i, escape_char as char, InvalidEscape)); } unsafe { - *buffer.get_unchecked_mut(dst_i + bs_dist as usize) = escape_result; + *buffer.get_kinda_unchecked_mut(dst_i + bs_dist as usize) = escape_result; } src_i += bs_dist as usize + 2; dst_i += bs_dist as usize + 1; diff --git a/src/charutils.rs b/src/charutils.rs index 0f5707ae..f84ed34e 100644 --- a/src/charutils.rs +++ b/src/charutils.rs @@ -1,3 +1,5 @@ +use crate::safer_unchecked::GetSaferUnchecked; + const STRUCTURAL_OR_WHITESPACE_NEGATED: [u32; 256] = [ 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, @@ -22,12 +24,12 @@ const STRUCTURAL_OR_WHITESPACE: [u32; 256] = [ #[cfg_attr(not(feature = "no-inline"), inline(always))] pub fn is_not_structural_or_whitespace(c: u8) -> u32 { - unsafe { *STRUCTURAL_OR_WHITESPACE_NEGATED.get_unchecked(c as usize) } + unsafe { *STRUCTURAL_OR_WHITESPACE_NEGATED.get_kinda_unchecked(c as usize) } } #[cfg_attr(not(feature = "no-inline"), inline(always))] pub fn is_structural_or_whitespace(c: u8) -> u32 { - unsafe { *STRUCTURAL_OR_WHITESPACE.get_unchecked(c as usize) } + unsafe { *STRUCTURAL_OR_WHITESPACE.get_kinda_unchecked(c as usize) } } const DIGITTOVAL: [i8; 256] = [ @@ -55,10 +57,10 @@ pub fn hex_to_u32_nocheck(src: &[u8]) -> u32 { // invalid value. After the shifts, this will *still* result in the outcome that the high 16 bits of any // value with any invalid char will be all 1's. We check for this in the caller. unsafe { - let v1: i32 = i32::from(*DIGITTOVAL.get_unchecked(*src.get_unchecked(0) as usize)); - let v2: i32 = i32::from(*DIGITTOVAL.get_unchecked(*src.get_unchecked(1) as usize)); - let v3: i32 = i32::from(*DIGITTOVAL.get_unchecked(*src.get_unchecked(2) as usize)); - let v4: i32 = i32::from(*DIGITTOVAL.get_unchecked(*src.get_unchecked(3) as usize)); + let v1: i32 = i32::from(*DIGITTOVAL.get_kinda_unchecked(*src.get_kinda_unchecked(0) as usize)); + let v2: i32 = i32::from(*DIGITTOVAL.get_kinda_unchecked(*src.get_kinda_unchecked(1) as usize)); + let v3: i32 = i32::from(*DIGITTOVAL.get_kinda_unchecked(*src.get_kinda_unchecked(2) as usize)); + let v4: i32 = i32::from(*DIGITTOVAL.get_kinda_unchecked(*src.get_kinda_unchecked(3) as usize)); (v1 << 12 | v2 << 8 | v3 << 4 | v4) as u32 } } @@ -80,27 +82,27 @@ pub fn hex_to_u32_nocheck(src: &[u8]) -> u32 { pub fn codepoint_to_utf8(cp: u32, c: &mut [u8]) -> usize { unsafe { if cp <= 0x7F { - *c.get_unchecked_mut(0) = cp as u8; + *c.get_kinda_unchecked_mut(0) = cp as u8; return 1; // ascii } if cp <= 0x7FF { - *c.get_unchecked_mut(0) = ((cp >> 6) + 192) as u8; - *c.get_unchecked_mut(1) = ((cp & 63) + 128) as u8; + *c.get_kinda_unchecked_mut(0) = ((cp >> 6) + 192) as u8; + *c.get_kinda_unchecked_mut(1) = ((cp & 63) + 128) as u8; return 2; // universal plane // Surrogates are treated elsewhere... //} //else if (0xd800 <= cp && cp <= 0xdfff) { // return 0; // surrogates // could put assert here } else if cp <= 0xFFFF { - *c.get_unchecked_mut(0) = ((cp >> 12) + 224) as u8; - *c.get_unchecked_mut(1) = (((cp >> 6) & 63) + 128) as u8; - *c.get_unchecked_mut(2) = ((cp & 63) + 128) as u8; + *c.get_kinda_unchecked_mut(0) = ((cp >> 12) + 224) as u8; + *c.get_kinda_unchecked_mut(1) = (((cp >> 6) & 63) + 128) as u8; + *c.get_kinda_unchecked_mut(2) = ((cp & 63) + 128) as u8; return 3; } else if cp <= 0x0010_FFFF { // if you know you have a valid code point, this is not needed - *c.get_unchecked_mut(0) = ((cp >> 18) + 240) as u8; - *c.get_unchecked_mut(1) = (((cp >> 12) & 63) + 128) as u8; - *c.get_unchecked_mut(2) = (((cp >> 6) & 63) + 128) as u8; - *c.get_unchecked_mut(3) = ((cp & 63) + 128) as u8; + *c.get_kinda_unchecked_mut(0) = ((cp >> 18) + 240) as u8; + *c.get_kinda_unchecked_mut(1) = (((cp >> 12) & 63) + 128) as u8; + *c.get_kinda_unchecked_mut(2) = (((cp >> 6) & 63) + 128) as u8; + *c.get_kinda_unchecked_mut(3) = ((cp & 63) + 128) as u8; return 4; } } diff --git a/src/lib.rs b/src/lib.rs index 55047c27..3e1e8588 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -139,6 +139,9 @@ mod macros; mod error; mod numberparse; mod stringparse; +mod safer_unchecked; + +use safer_unchecked::GetSaferUnchecked; /// Reexport of Cow pub mod cow; @@ -481,9 +484,9 @@ impl<'de> Deserializer<'de> { unsafe { input_buffer .as_mut_slice() - .get_unchecked_mut(..len) + .get_kinda_unchecked_mut(..len) .clone_from_slice(input); - *(input_buffer.get_unchecked_mut(len)) = 0; + *(input_buffer.get_kinda_unchecked_mut(len)) = 0; input_buffer.set_len(len); }; @@ -519,7 +522,7 @@ impl<'de> Deserializer<'de> { #[cfg_attr(not(feature = "no-inline"), inline(always))] pub unsafe fn next_(&mut self) -> Node<'de> { self.idx += 1; - *self.tape.get_unchecked(self.idx) + *self.tape.get_kinda_unchecked(self.idx) } //#[inline(never)] @@ -569,7 +572,7 @@ impl<'de> Deserializer<'de> { __builtin_prefetch(buf + idx + 128); #endif */ - let chunk = input.get_unchecked(idx..idx + 64); + let chunk = input.get_kinda_unchecked(idx..idx + 64); utf8_validator.update_from_chunks(chunk); let input = SimdInput::new(chunk); diff --git a/src/neon/deser.rs b/src/neon/deser.rs index ee0721a7..0ef043c8 100644 --- a/src/neon/deser.rs +++ b/src/neon/deser.rs @@ -1,3 +1,4 @@ +use crate::safer_unchecked::GetSaferUnchecked; use crate::error::ErrorType; use crate::neon::stage1::bit_mask; use crate::stringparse::{handle_unicode_codepoint, ESCAPE_MAP}; @@ -64,14 +65,14 @@ impl<'de> Deserializer<'de> { // This is safe since we check sub's length in the range access above and only // create sub sliced form sub to `sub.len()`. - let src: &[u8] = unsafe { data.get_unchecked(idx..) }; + let src: &[u8] = unsafe { data.get_kinda_unchecked(idx..) }; let mut src_i: usize = 0; let mut len = src_i; loop { let (v0, v1) = unsafe { ( - vld1q_u8(src.get_unchecked(src_i..src_i + 16).as_ptr()), - vld1q_u8(src.get_unchecked(src_i + 16..src_i + 32).as_ptr()), + vld1q_u8(src.get_kinda_unchecked(src_i..src_i + 16).as_ptr()), + vld1q_u8(src.get_kinda_unchecked(src_i + 16..src_i + 32).as_ptr()), ) }; @@ -92,7 +93,7 @@ impl<'de> Deserializer<'de> { len += quote_dist as usize; unsafe { - let v = input.get_unchecked(idx..idx + len) as *const [u8] as *const str; + let v = input.get_kinda_unchecked(idx..idx + len) as *const [u8] as *const str; return Ok(&*v); } @@ -119,15 +120,15 @@ impl<'de> Deserializer<'de> { loop { let (v0, v1) = unsafe { ( - vld1q_u8(src.get_unchecked(src_i..src_i + 16).as_ptr()), - vld1q_u8(src.get_unchecked(src_i + 16..src_i + 32).as_ptr()), + vld1q_u8(src.get_kinda_unchecked(src_i..src_i + 16).as_ptr()), + vld1q_u8(src.get_kinda_unchecked(src_i + 16..src_i + 32).as_ptr()), ) }; unsafe { buffer - .get_unchecked_mut(dst_i..dst_i + 32) - .copy_from_slice(src.get_unchecked(src_i..src_i + 32)); + .get_kinda_unchecked_mut(dst_i..dst_i + 32) + .copy_from_slice(src.get_kinda_unchecked(src_i..src_i + 32)); } // store to dest unconditionally - we can overwrite the bits we don't like @@ -150,10 +151,10 @@ impl<'de> Deserializer<'de> { dst_i += quote_dist as usize; unsafe { input - .get_unchecked_mut(idx + len..idx + len + dst_i) - .clone_from_slice(buffer.get_unchecked(..dst_i)); + .get_kinda_unchecked_mut(idx + len..idx + len + dst_i) + .clone_from_slice(buffer.get_kinda_unchecked(..dst_i)); let v = - input.get_unchecked(idx..idx + len + dst_i) as *const [u8] as *const str; + input.get_kinda_unchecked(idx..idx + len + dst_i) as *const [u8] as *const str; return Ok(&*v); } @@ -163,7 +164,7 @@ impl<'de> Deserializer<'de> { if (quote_bits.wrapping_sub(1) & bs_bits) != 0 { // find out where the backspace is let bs_dist: u32 = bs_bits.trailing_zeros(); - let escape_char: u8 = unsafe { *src.get_unchecked(src_i + bs_dist as usize + 1) }; + let escape_char: u8 = unsafe { *src.get_kinda_unchecked(src_i + bs_dist as usize + 1) }; // we encountered backslash first. Handle backslash if escape_char == b'u' { // move src/dst up to the start; they will be further adjusted @@ -171,8 +172,8 @@ impl<'de> Deserializer<'de> { src_i += bs_dist as usize; dst_i += bs_dist as usize; let (o, s) = if let Ok(r) = - handle_unicode_codepoint(unsafe { src.get_unchecked(src_i..) }, unsafe { - buffer.get_unchecked_mut(dst_i..) + handle_unicode_codepoint(unsafe { src.get_kinda_unchecked(src_i..) }, unsafe { + buffer.get_kinda_unchecked_mut(dst_i..) }) { r } else { @@ -190,12 +191,12 @@ impl<'de> Deserializer<'de> { // note this may reach beyond the part of the buffer we've actually // seen. I think this is ok let escape_result: u8 = - unsafe { *ESCAPE_MAP.get_unchecked(escape_char as usize) }; + unsafe { *ESCAPE_MAP.get_kinda_unchecked(escape_char as usize) }; if escape_result == 0 { return Err(Self::raw_error(src_i, escape_char as char, InvalidEscape)); } unsafe { - *buffer.get_unchecked_mut(dst_i + bs_dist as usize) = escape_result; + *buffer.get_kinda_unchecked_mut(dst_i + bs_dist as usize) = escape_result; } src_i += bs_dist as usize + 2; dst_i += bs_dist as usize + 1; diff --git a/src/numberparse.rs b/src/numberparse.rs index 727ccbdc..d7053d9c 100644 --- a/src/numberparse.rs +++ b/src/numberparse.rs @@ -1,3 +1,4 @@ +use crate::safer_unchecked::GetSaferUnchecked; use crate::charutils::is_structural_or_whitespace; use crate::unlikely; use crate::StaticNode; @@ -98,7 +99,7 @@ const STRUCTURAL_OR_WHITESPACE_OR_EXPONENT_OR_DECIMAL_NEGATED: [bool; 256] = [ #[cfg_attr(not(feature = "no-inline"), inline(always))] fn is_not_structural_or_whitespace_or_exponent_or_decimal(c: u8) -> bool { - unsafe { *STRUCTURAL_OR_WHITESPACE_OR_EXPONENT_OR_DECIMAL_NEGATED.get_unchecked(c as usize) } + unsafe { *STRUCTURAL_OR_WHITESPACE_OR_EXPONENT_OR_DECIMAL_NEGATED.get_kinda_unchecked(c as usize) } } // #ifdef _MSC_VER @@ -145,7 +146,7 @@ fn parse_eight_digits_unrolled(chars: &[u8]) -> u32 { let input: __m128i = _mm_sub_epi8( _mm_loadu_si128( chars - .get_unchecked(0..16) + .get_kinda_unchecked(0..16) .as_ptr() .cast::(), ), @@ -194,26 +195,26 @@ impl<'de> Deserializer<'de> { let mut i: f64; let mut digit: u8; let mut d; - if unsafe { *p.get_unchecked(digitcount) } == b'0' { + if unsafe { *p.get_kinda_unchecked(digitcount) } == b'0' { // 0 cannot be followed by an integer digitcount += 1; i = 0.0; } else { - digit = unsafe { *p.get_unchecked(digitcount) } - b'0'; + digit = unsafe { *p.get_kinda_unchecked(digitcount) } - b'0'; i = f64::from(digit); digitcount += 1; - while is_integer(unsafe { *p.get_unchecked(digitcount) }) { - digit = unsafe { *p.get_unchecked(digitcount) } - b'0'; + while is_integer(unsafe { *p.get_kinda_unchecked(digitcount) }) { + digit = unsafe { *p.get_kinda_unchecked(digitcount) } - b'0'; i = 10.0 * i + f64::from(digit); digitcount += 1; } } - if unsafe { *p.get_unchecked(digitcount) } == b'.' { + if unsafe { *p.get_kinda_unchecked(digitcount) } == b'.' { let mut fraction: u64 = 0; let mut fraction_weight: u64 = 10; digitcount += 1; //let mut fractionalweight: f64 = 1.0; - d = unsafe { *p.get_unchecked(digitcount) }; + d = unsafe { *p.get_kinda_unchecked(digitcount) }; if is_integer(d) { digit = d - b'0'; digitcount += 1; @@ -226,10 +227,10 @@ impl<'de> Deserializer<'de> { )); } - while is_integer(unsafe { *p.get_unchecked(digitcount) }) + while is_integer(unsafe { *p.get_kinda_unchecked(digitcount) }) && fraction_weight <= 10_000_000_000_000_000_u64 { - digit = unsafe { *p.get_unchecked(digitcount) } - b'0'; + digit = unsafe { *p.get_kinda_unchecked(digitcount) } - b'0'; digitcount += 1; fraction_weight *= 10; fraction = fraction * 10 + u64::from(digit); @@ -238,26 +239,26 @@ impl<'de> Deserializer<'de> { let mut fraction_weight = fraction_weight as f64; let mut fraction = (fraction as f64) / fraction_weight; - while is_integer(unsafe { *p.get_unchecked(digitcount) }) { - digit = unsafe { *p.get_unchecked(digitcount) } - b'0'; + while is_integer(unsafe { *p.get_kinda_unchecked(digitcount) }) { + digit = unsafe { *p.get_kinda_unchecked(digitcount) } - b'0'; digitcount += 1; fraction_weight *= 10.0; fraction += f64::from(digit) / fraction_weight; } i += fraction; } - if (unsafe { *p.get_unchecked(digitcount) } == b'e') - || (unsafe { *p.get_unchecked(digitcount) } == b'E') + if (unsafe { *p.get_kinda_unchecked(digitcount) } == b'e') + || (unsafe { *p.get_kinda_unchecked(digitcount) } == b'E') { digitcount += 1; let mut negexp: bool = false; - if unsafe { *p.get_unchecked(digitcount) } == b'-' { + if unsafe { *p.get_kinda_unchecked(digitcount) } == b'-' { negexp = true; digitcount += 1; - } else if unsafe { *p.get_unchecked(digitcount) } == b'+' { + } else if unsafe { *p.get_kinda_unchecked(digitcount) } == b'+' { digitcount += 1; } - d = unsafe { *p.get_unchecked(digitcount) }; + d = unsafe { *p.get_kinda_unchecked(digitcount) }; if !is_integer(d) { return Err(Self::raw_error( idx + digitcount, @@ -265,28 +266,28 @@ impl<'de> Deserializer<'de> { ErrorType::InvalidNumber, )); } - digit = unsafe { *p.get_unchecked(digitcount) } - b'0'; + digit = unsafe { *p.get_kinda_unchecked(digitcount) } - b'0'; let mut expnumber: u32 = u32::from(digit); // exponential part digitcount += 1; - d = unsafe { *p.get_unchecked(digitcount) }; + d = unsafe { *p.get_kinda_unchecked(digitcount) }; if is_integer(d) { digit = d - b'0'; expnumber = 10 * expnumber + u32::from(digit); digitcount += 1; } - d = unsafe { *p.get_unchecked(digitcount) }; + d = unsafe { *p.get_kinda_unchecked(digitcount) }; if is_integer(d) { digit = d - b'0'; expnumber = 10 * expnumber + u32::from(digit); digitcount += 1; } - d = unsafe { *p.get_unchecked(digitcount) }; + d = unsafe { *p.get_kinda_unchecked(digitcount) }; if is_integer(d) { digit = d - b'0'; expnumber = 10 * expnumber + u32::from(digit); digitcount += 1; } - d = unsafe { *p.get_unchecked(digitcount) }; + d = unsafe { *p.get_kinda_unchecked(digitcount) }; if is_integer(d) { // we refuse to parse this return Err(Self::raw_error( @@ -311,7 +312,7 @@ impl<'de> Deserializer<'de> { i *= POWER_OF_TEN[(323 + exponent) as usize]; } - d = unsafe { *p.get_unchecked(digitcount) }; + d = unsafe { *p.get_kinda_unchecked(digitcount) }; if is_structural_or_whitespace(d) == 0 { Err(Self::raw_error( idx + digitcount, @@ -337,18 +338,18 @@ impl<'de> Deserializer<'de> { fn parse_large_integer(idx: usize, buf: &[u8], negative: bool) -> Result { let mut digitcount = if negative { 1 } else { 0 }; let mut i: u64; - let mut d = unsafe { *buf.get_unchecked(digitcount) }; + let mut d = unsafe { *buf.get_kinda_unchecked(digitcount) }; let mut digit: u8; if d == b'0' { digitcount += 1; - d = unsafe { *buf.get_unchecked(digitcount) }; + d = unsafe { *buf.get_kinda_unchecked(digitcount) }; i = 0; } else { digit = d - b'0'; i = u64::from(digit); digitcount += 1; - d = unsafe { *buf.get_unchecked(digitcount) }; + d = unsafe { *buf.get_kinda_unchecked(digitcount) }; // the is_made_of_eight_digits_fast routine is unlikely to help here because // we rarely see large integer parts like 123456789 while is_integer(d) { @@ -366,7 +367,7 @@ impl<'de> Deserializer<'de> { )); } digitcount += 1; - d = unsafe { *buf.get_unchecked(digitcount) }; + d = unsafe { *buf.get_kinda_unchecked(digitcount) }; } } @@ -398,18 +399,18 @@ impl<'de> Deserializer<'de> { fn parse_large_integer(idx: usize, buf: &[u8], negative: bool) -> Result { let mut digitcount = if negative { 1 } else { 0 }; let mut i: u128; - let mut d = unsafe { *buf.get_unchecked(digitcount) }; + let mut d = unsafe { *buf.get_kinda_unchecked(digitcount) }; let mut digit: u8; if d == b'0' { digitcount += 1; - d = unsafe { *buf.get_unchecked(digitcount) }; + d = unsafe { *buf.get_kinda_unchecked(digitcount) }; i = 0; } else { digit = d - b'0'; i = u128::from(digit); digitcount += 1; - d = unsafe { *buf.get_unchecked(digitcount) }; + d = unsafe { *buf.get_kinda_unchecked(digitcount) }; // the is_made_of_eight_digits_fast routine is unlikely to help here because // we rarely see large integer parts like 123456789 while is_integer(d) { @@ -427,7 +428,7 @@ impl<'de> Deserializer<'de> { )); } digitcount += 1; - d = unsafe { *buf.get_unchecked(digitcount) }; + d = unsafe { *buf.get_kinda_unchecked(digitcount) }; } } @@ -475,12 +476,12 @@ impl<'de> Deserializer<'de> { let mut ignore_count: u8 = 0; //let startdigits: *const u8 = p; let mut i: u64; - let mut d = unsafe { *buf.get_unchecked(byte_count) }; + let mut d = unsafe { *buf.get_kinda_unchecked(byte_count) }; let mut digit: u8; if d == b'0' { // 0 cannot be followed by an integer byte_count += 1; - d = unsafe { *buf.get_unchecked(byte_count) }; + d = unsafe { *buf.get_kinda_unchecked(byte_count) }; if is_not_structural_or_whitespace_or_exponent_or_decimal(d) { return Err(Self::raw_error( idx + byte_count, @@ -502,7 +503,7 @@ impl<'de> Deserializer<'de> { i = u64::from(digit); byte_count += 1; - d = unsafe { *buf.get_unchecked(byte_count) }; + d = unsafe { *buf.get_kinda_unchecked(byte_count) }; // the is_made_of_eight_digits_fast routine is unlikely to help here because // we rarely see large integer parts like 123456789 while is_integer(d) { @@ -519,14 +520,14 @@ impl<'de> Deserializer<'de> { } //i = 10 * i + u64::from(digit); // might overflow byte_count += 1; - d = unsafe { *buf.get_unchecked(byte_count) }; + d = unsafe { *buf.get_kinda_unchecked(byte_count) }; } } let mut exponent: i64 = if d == b'.' { ignore_count += 1; byte_count += 1; - d = unsafe { *buf.get_unchecked(byte_count) }; + d = unsafe { *buf.get_kinda_unchecked(byte_count) }; let firstafterperiod = byte_count; if is_integer(d) { digit = d - b'0'; @@ -547,19 +548,19 @@ impl<'de> Deserializer<'de> { // FIXME // can we omit this: buf.len() - byte_count >= 8 - if is_made_of_eight_digits_fast(unsafe { buf.get_unchecked(byte_count..) }) { + if is_made_of_eight_digits_fast(unsafe { buf.get_kinda_unchecked(byte_count..) }) { i = i.wrapping_mul(100_000_000).wrapping_add(u64::from( - parse_eight_digits_unrolled(unsafe { buf.get_unchecked(byte_count..) }), + parse_eight_digits_unrolled(unsafe { buf.get_kinda_unchecked(byte_count..) }), )); byte_count += 8; } } - d = unsafe { *buf.get_unchecked(byte_count) }; + d = unsafe { *buf.get_kinda_unchecked(byte_count) }; while is_integer(d) { digit = d - b'0'; i = i.wrapping_mul(10).wrapping_add(u64::from(digit)); byte_count += 1; - d = unsafe { *buf.get_unchecked(byte_count) }; + d = unsafe { *buf.get_kinda_unchecked(byte_count) }; } firstafterperiod as i64 - byte_count as i64 } else { @@ -569,17 +570,17 @@ impl<'de> Deserializer<'de> { if (d == b'e') || (d == b'E') { ignore_count += 1; byte_count += 1; - d = unsafe { *buf.get_unchecked(byte_count) }; + d = unsafe { *buf.get_kinda_unchecked(byte_count) }; let mut negexp: bool = false; if d == b'-' { negexp = true; ignore_count += 1; byte_count += 1; - d = unsafe { *buf.get_unchecked(byte_count) }; + d = unsafe { *buf.get_kinda_unchecked(byte_count) }; } else if d == b'+' { ignore_count += 1; byte_count += 1; - d = unsafe { *buf.get_unchecked(byte_count) }; + d = unsafe { *buf.get_kinda_unchecked(byte_count) }; } if !is_integer(d) { return Err(Self::raw_error( @@ -592,20 +593,20 @@ impl<'de> Deserializer<'de> { expnumber = i16::from(digit); byte_count += 1; ignore_count += 1; - d = unsafe { *buf.get_unchecked(byte_count) }; + d = unsafe { *buf.get_kinda_unchecked(byte_count) }; if is_integer(d) { digit = d - b'0'; expnumber = 10 * expnumber + i16::from(digit); ignore_count += 1; byte_count += 1; - d = unsafe { *buf.get_unchecked(byte_count) }; + d = unsafe { *buf.get_kinda_unchecked(byte_count) }; } if is_integer(d) { digit = d - b'0'; expnumber = 10 * expnumber + i16::from(digit); ignore_count += 1; byte_count += 1; - d = unsafe { *buf.get_unchecked(byte_count) }; + d = unsafe { *buf.get_kinda_unchecked(byte_count) }; } if is_integer(d) { // we refuse to parse this diff --git a/src/safer_unchecked.rs b/src/safer_unchecked.rs new file mode 100644 index 00000000..60b89602 --- /dev/null +++ b/src/safer_unchecked.rs @@ -0,0 +1,37 @@ +use core::slice::SliceIndex; + +pub trait GetSaferUnchecked { + unsafe fn get_kinda_unchecked(&self, index: I) -> &>::Output + where + I: SliceIndex<[T]>; + + unsafe fn get_kinda_unchecked_mut(&mut self, index: I) -> &mut >::Output + where + I: SliceIndex<[T]>; +} + +impl GetSaferUnchecked for [T] { + #[inline(always)] + unsafe fn get_kinda_unchecked(&self, index: I) -> &>::Output + where + I: SliceIndex<[T]>, + { + if cfg!(debug_assertions) { + &self[index] + } else { + self.get_unchecked(index) + } + } + + #[inline(always)] + unsafe fn get_kinda_unchecked_mut(&mut self, index: I) -> &mut >::Output + where + I: SliceIndex<[T]>, + { + if cfg!(debug_assertions) { + &mut self[index] + } else { + self.get_unchecked_mut(index) + } + } +} diff --git a/src/simd128/deser.rs b/src/simd128/deser.rs index 0d562a92..7f0455ad 100644 --- a/src/simd128/deser.rs +++ b/src/simd128/deser.rs @@ -6,6 +6,7 @@ pub use crate::{ }; use crate::{ stringparse::{handle_unicode_codepoint, ESCAPE_MAP}, + safer_unchecked::GetSaferUnchecked; Deserializer, }; @@ -34,7 +35,7 @@ impl<'de> Deserializer<'de> { // This is safe since we check sub's length in the range access above and only // create sub sliced form sub to `sub.len()`. - let src = unsafe { data.get_unchecked(idx..) }; + let src = unsafe { data.get_kinda_unchecked(idx..) }; let mut src_i = 0; let mut len = src_i; loop { @@ -57,7 +58,7 @@ impl<'de> Deserializer<'de> { len += quote_dist as usize; unsafe { - let v = input.get_unchecked(idx..idx + len) as *const [u8] as *const str; + let v = input.get_kinda_unchecked(idx..idx + len) as *const [u8] as *const str; return Ok(&*v); } @@ -108,10 +109,10 @@ impl<'de> Deserializer<'de> { dst_i += quote_dist as usize; unsafe { input - .get_unchecked_mut(idx + len..idx + len + dst_i) - .clone_from_slice(buffer.get_unchecked(..dst_i)); + .get_kinda_unchecked_mut(idx + len..idx + len + dst_i) + .clone_from_slice(buffer.get_kinda_unchecked(..dst_i)); let v = - input.get_unchecked(idx..idx + len + dst_i) as *const [u8] as *const str; + input.get_kinda_unchecked(idx..idx + len + dst_i) as *const [u8] as *const str; return Ok(&*v); } @@ -121,7 +122,7 @@ impl<'de> Deserializer<'de> { if (quote_bits.wrapping_sub(1) & bs_bits) != 0 { // find out where the backspace is let bs_dist = bs_bits.trailing_zeros(); - let escape_char = unsafe { *src.get_unchecked(src_i + bs_dist as usize + 1) }; + let escape_char = unsafe { *src.get_kinda_unchecked(src_i + bs_dist as usize + 1) }; // we encountered backslash first. Handle backslash if escape_char == b'u' { // move src/dst up to the start; they will be further adjusted @@ -129,8 +130,8 @@ impl<'de> Deserializer<'de> { src_i += bs_dist as usize; dst_i += bs_dist as usize; let (o, s) = if let Ok(r) = - handle_unicode_codepoint(unsafe { src.get_unchecked(src_i..) }, unsafe { - buffer.get_unchecked_mut(dst_i..) + handle_unicode_codepoint(unsafe { src.get_kinda_unchecked(src_i..) }, unsafe { + buffer.get_kinda_unchecked_mut(dst_i..) }) { r } else { @@ -147,12 +148,12 @@ impl<'de> Deserializer<'de> { // write bs_dist+1 characters to output // note this may reach beyond the part of the buffer we've actually // seen. I think this is ok - let escape_result = unsafe { *ESCAPE_MAP.get_unchecked(escape_char as usize) }; + let escape_result = unsafe { *ESCAPE_MAP.get_kinda_unchecked(escape_char as usize) }; if escape_result == 0 { return Err(Self::raw_error(src_i, escape_char as char, InvalidEscape)); } unsafe { - *buffer.get_unchecked_mut(dst_i + bs_dist as usize) = escape_result; + *buffer.get_kinda_unchecked_mut(dst_i + bs_dist as usize) = escape_result; } src_i += bs_dist as usize + 2; dst_i += bs_dist as usize + 1; diff --git a/src/sse42/deser.rs b/src/sse42/deser.rs index e2760ae2..bf4f3132 100644 --- a/src/sse42/deser.rs +++ b/src/sse42/deser.rs @@ -11,6 +11,7 @@ use std::mem; pub use crate::error::{Error, ErrorType}; use crate::stringparse::{handle_unicode_codepoint, ESCAPE_MAP}; +use crate::safer_unchecked::GetSaferUnchecked; use crate::Deserializer; pub use crate::Result; @@ -39,7 +40,7 @@ impl<'de> Deserializer<'de> { // This is safe since we check sub's length in the range access above and only // create sub sliced form sub to `sub.len()`. - let src: &[u8] = unsafe { data.get_unchecked(idx..) }; + let src: &[u8] = unsafe { data.get_kinda_unchecked(idx..) }; let mut src_i: usize = 0; let mut len = src_i; loop { @@ -72,7 +73,7 @@ impl<'de> Deserializer<'de> { len += quote_dist as usize; unsafe { - let v = input.get_unchecked(idx..idx + len) as *const [u8] as *const str; + let v = input.get_kinda_unchecked(idx..idx + len) as *const [u8] as *const str; return Ok(&*v); } @@ -137,10 +138,10 @@ impl<'de> Deserializer<'de> { dst_i += quote_dist as usize; unsafe { input - .get_unchecked_mut(idx + len..idx + len + dst_i) - .clone_from_slice(buffer.get_unchecked(..dst_i)); + .get_kinda_unchecked_mut(idx + len..idx + len + dst_i) + .clone_from_slice(buffer.get_kinda_unchecked(..dst_i)); let v = - input.get_unchecked(idx..idx + len + dst_i) as *const [u8] as *const str; + input.get_kinda_unchecked(idx..idx + len + dst_i) as *const [u8] as *const str; return Ok(&*v); } @@ -150,7 +151,7 @@ impl<'de> Deserializer<'de> { if (quote_bits.wrapping_sub(1) & bs_bits) != 0 { // find out where the backspace is let bs_dist: u32 = bs_bits.trailing_zeros(); - let escape_char: u8 = unsafe { *src.get_unchecked(src_i + bs_dist as usize + 1) }; + let escape_char: u8 = unsafe { *src.get_kinda_unchecked(src_i + bs_dist as usize + 1) }; // we encountered backslash first. Handle backslash if escape_char == b'u' { // move src/dst up to the start; they will be further adjusted @@ -158,8 +159,8 @@ impl<'de> Deserializer<'de> { src_i += bs_dist as usize; dst_i += bs_dist as usize; let (o, s) = if let Ok(r) = - handle_unicode_codepoint(unsafe { src.get_unchecked(src_i..) }, unsafe { - buffer.get_unchecked_mut(dst_i..) + handle_unicode_codepoint(unsafe { src.get_kinda_unchecked(src_i..) }, unsafe { + buffer.get_kinda_unchecked_mut(dst_i..) }) { r } else { @@ -177,12 +178,12 @@ impl<'de> Deserializer<'de> { // note this may reach beyond the part of the buffer we've actually // seen. I think this is ok let escape_result: u8 = - unsafe { *ESCAPE_MAP.get_unchecked(escape_char as usize) }; + unsafe { *ESCAPE_MAP.get_kinda_unchecked(escape_char as usize) }; if escape_result == 0 { return Err(Self::raw_error(src_i, escape_char as char, InvalidEscape)); } unsafe { - *buffer.get_unchecked_mut(dst_i + bs_dist as usize) = escape_result; + *buffer.get_kinda_unchecked_mut(dst_i + bs_dist as usize) = escape_result; } src_i += bs_dist as usize + 2; dst_i += bs_dist as usize + 1; diff --git a/src/stage2.rs b/src/stage2.rs index 574539aa..b048bacf 100644 --- a/src/stage2.rs +++ b/src/stage2.rs @@ -2,6 +2,7 @@ use crate::charutils::is_not_structural_or_whitespace; use crate::value::tape::Node; use crate::{Deserializer, Error, ErrorType, Result}; +use crate::safer_unchecked::GetSaferUnchecked; use value_trait::StaticNode; #[cfg_attr(not(feature = "no-inline"), inline(always))] @@ -20,7 +21,7 @@ pub fn is_valid_true_atom(loc: &[u8]) -> bool { let locval: u64 = *(loc.as_ptr().cast::()); error = (locval & MASK4) ^ TV; - error |= u64::from(is_not_structural_or_whitespace(*loc.get_unchecked(4))); + error |= u64::from(is_not_structural_or_whitespace(*loc.get_kinda_unchecked(4))); } error == 0 } @@ -35,7 +36,7 @@ macro_rules! get { #[cfg(not(feature = "safe"))] macro_rules! get { ($a:expr, $i:expr) => {{ - unsafe { $a.get_unchecked($i) } + unsafe { $a.get_kinda_unchecked($i) } }}; } diff --git a/src/stringparse.rs b/src/stringparse.rs index d2782068..e88cbc24 100644 --- a/src/stringparse.rs +++ b/src/stringparse.rs @@ -1,5 +1,6 @@ use std::ops::Range; +use crate::safer_unchecked::GetSaferUnchecked; use crate::charutils::{codepoint_to_utf8, hex_to_u32_nocheck}; use crate::error::ErrorType; @@ -38,19 +39,19 @@ pub(crate) fn handle_unicode_codepoint( // hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the // conversion isn't valid; we defer the check for this to inside the // multilingual plane check - let mut code_point: u32 = hex_to_u32_nocheck(unsafe { src_ptr.get_unchecked(2..) }); - src_ptr = unsafe { src_ptr.get_unchecked(6..) }; + let mut code_point: u32 = hex_to_u32_nocheck(unsafe { src_ptr.get_kinda_unchecked(2..) }); + src_ptr = unsafe { src_ptr.get_kinda_unchecked(6..) }; let mut src_offset = 6; // check for low surrogate for characters outside the Basic // Multilingual Plane. if HIGH_SURROGATES.contains(&code_point) { - if (unsafe { *src_ptr.get_unchecked(0) } != b'\\') - || unsafe { *src_ptr.get_unchecked(1) } != b'u' + if (unsafe { *src_ptr.get_kinda_unchecked(0) } != b'\\') + || unsafe { *src_ptr.get_kinda_unchecked(1) } != b'u' { return Ok((0, src_offset)); } - let code_point_2: u32 = hex_to_u32_nocheck(unsafe { src_ptr.get_unchecked(2..) }); + let code_point_2: u32 = hex_to_u32_nocheck(unsafe { src_ptr.get_kinda_unchecked(2..) }); // if the first code point is invalid we will get here, as we will go past // the check for being outside the Basic Multilingual plane. If we don't diff --git a/src/value.rs b/src/value.rs index 5270dc68..7a85ef93 100644 --- a/src/value.rs +++ b/src/value.rs @@ -65,6 +65,7 @@ pub use self::owned::{ Value as OwnedValue, }; use crate::{Deserializer, Result}; +use crate::safer_unchecked::GetSaferUnchecked; use halfbrown::HashMap; use std::hash::Hash; use std::marker::PhantomData; @@ -131,7 +132,7 @@ where unsafe { res.set_len(len); for i in 0..len { - std::ptr::write(res.get_unchecked_mut(i), self.parse()); + std::ptr::write(res.get_kinda_unchecked_mut(i), self.parse()); } } Value::from(res) diff --git a/src/value/borrowed.rs b/src/value/borrowed.rs index a3a154c4..8280b214 100644 --- a/src/value/borrowed.rs +++ b/src/value/borrowed.rs @@ -27,6 +27,7 @@ mod serialize; use crate::cow::Cow; use crate::prelude::*; use crate::{AlignedBuf, Deserializer, Node, Result, StaticNode}; +use crate::safer_unchecked::GetSaferUnchecked; use halfbrown::HashMap; use std::fmt; use std::ops::{Index, IndexMut}; @@ -385,7 +386,7 @@ impl<'de> BorrowDeserializer<'de> { unsafe { res.set_len(len); for i in 0..len { - std::ptr::write(res.get_unchecked_mut(i), self.parse()); + std::ptr::write(res.get_kinda_unchecked_mut(i), self.parse()); } } Value::Array(res) diff --git a/src/value/owned.rs b/src/value/owned.rs index 8c1ab032..a1201f1a 100644 --- a/src/value/owned.rs +++ b/src/value/owned.rs @@ -25,6 +25,7 @@ mod serialize; use crate::prelude::*; use crate::{AlignedBuf, Deserializer, Node, Result, StaticNode}; +use crate::safer_unchecked::GetSaferUnchecked; use halfbrown::HashMap; use std::fmt; use std::ops::{Index, IndexMut}; @@ -323,7 +324,7 @@ impl<'de> OwnedDeserializer<'de> { unsafe { res.set_len(len); for i in 0..len { - std::ptr::write(res.get_unchecked_mut(i), self.parse()); + std::ptr::write(res.get_kinda_unchecked_mut(i), self.parse()); } } Value::Array(res) From 97a0392b546704d7b5938650b4c6659cf660d443 Mon Sep 17 00:00:00 2001 From: 5225225 <5225225@mailbox.org> Date: Fri, 1 Jul 2022 00:00:43 +0100 Subject: [PATCH 2/5] Fix unchecked indexing in from_slice_with_buffers --- src/lib.rs | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 3e1e8588..02cd1596 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -482,11 +482,14 @@ impl<'de> Deserializer<'de> { } unsafe { - input_buffer - .as_mut_slice() - .get_kinda_unchecked_mut(..len) - .clone_from_slice(input); - *(input_buffer.get_kinda_unchecked_mut(len)) = 0; + std::ptr::copy_nonoverlapping( + input.as_ptr(), + input_buffer.as_mut_ptr(), + len, + ); + + input_buffer.as_mut_ptr().add(len).write(0); + input_buffer.set_len(len); }; @@ -720,15 +723,16 @@ impl AlignedBuf { } } + fn as_mut_ptr(&mut self) -> *mut u8 { + self.inner.as_ptr() + } + fn capacity_overflow() -> ! { panic!("capacity overflow"); } fn capacity(&self) -> usize { self.capacity } - fn as_mut_slice(&mut self) -> &mut [u8] { - unsafe { std::slice::from_raw_parts_mut(self.inner.as_ptr(), self.len) } - } unsafe fn set_len(&mut self, n: usize) { assert!( n <= self.capacity, From b85e280b72d0d043c0d35b8ddaee348a6a0ca854 Mon Sep 17 00:00:00 2001 From: 5225225 <5225225@mailbox.org> Date: Fri, 1 Jul 2022 10:23:03 +0100 Subject: [PATCH 3/5] Add debug assertion --- src/stage2.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/stage2.rs b/src/stage2.rs index b048bacf..4c7e9aaa 100644 --- a/src/stage2.rs +++ b/src/stage2.rs @@ -8,6 +8,8 @@ use value_trait::StaticNode; #[cfg_attr(not(feature = "no-inline"), inline(always))] #[allow(clippy::cast_ptr_alignment)] pub fn is_valid_true_atom(loc: &[u8]) -> bool { + debug_assert!(loc.len() >= 8, "input too short to safely read a u64 from"); + // TODO is this expensive? let mut error: u64; unsafe { From 65d30a6dacad78390ce5c8ba07c77e9ec3f9c0b3 Mon Sep 17 00:00:00 2001 From: 5225225 <5225225@mailbox.org> Date: Sat, 2 Jul 2022 11:22:30 +0100 Subject: [PATCH 4/5] Use correct input to find structural bits --- src/lib.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 02cd1596..f3c0666e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,6 +1,5 @@ #![deny(warnings)] #![cfg_attr(feature = "hints", feature(core_intrinsics))] -#![deny(warnings)] #![warn(unused_extern_crates)] #![deny( clippy::all, @@ -488,13 +487,14 @@ impl<'de> Deserializer<'de> { len, ); - input_buffer.as_mut_ptr().add(len).write(0); + let to_fill = input_buffer.capacity() - len; + std::ptr::write_bytes(input_buffer.as_mut_ptr().add(len), 0, to_fill); - input_buffer.set_len(len); + input_buffer.set_len(input_buffer.capacity()); }; let s1_result: std::result::Result, ErrorType> = - unsafe { Self::find_structural_bits(input_buffer) }; + unsafe { Self::find_structural_bits(input) }; let structural_indexes = match s1_result { Ok(i) => i, From 0fdcdae1902aef5004229e8e91e73b01c4ab5a97 Mon Sep 17 00:00:00 2001 From: 5225225 <5225225@mailbox.org> Date: Sat, 2 Jul 2022 11:47:14 +0100 Subject: [PATCH 5/5] Add some more debug asserts, fix misaligned reads --- src/stage2.rs | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/stage2.rs b/src/stage2.rs index 4c7e9aaa..41829bcf 100644 --- a/src/stage2.rs +++ b/src/stage2.rs @@ -8,7 +8,7 @@ use value_trait::StaticNode; #[cfg_attr(not(feature = "no-inline"), inline(always))] #[allow(clippy::cast_ptr_alignment)] pub fn is_valid_true_atom(loc: &[u8]) -> bool { - debug_assert!(loc.len() >= 8, "input too short to safely read a u64 from"); + debug_assert!(loc.len() >= 8, "loc too short for a u64 read"); // TODO is this expensive? let mut error: u64; @@ -20,7 +20,7 @@ pub fn is_valid_true_atom(loc: &[u8]) -> bool { // TODO: does this has the same effect as: // std::memcpy(&locval, loc, sizeof(uint64_t)); - let locval: u64 = *(loc.as_ptr().cast::()); + let locval: u64 = loc.as_ptr().cast::().read_unaligned(); error = (locval & MASK4) ^ TV; error |= u64::from(is_not_structural_or_whitespace(*loc.get_kinda_unchecked(4))); @@ -45,6 +45,8 @@ macro_rules! get { #[cfg_attr(not(feature = "no-inline"), inline(always))] #[allow(clippy::cast_ptr_alignment, unused_unsafe)] pub fn is_valid_false_atom(loc: &[u8]) -> bool { + debug_assert!(loc.len() >= 8, "loc too short for a u64 read"); + // TODO: this is ugly and probably copies data every time let mut error: u64; unsafe { @@ -54,7 +56,7 @@ pub fn is_valid_false_atom(loc: &[u8]) -> bool { const FV: u64 = 0x00_00_00_65_73_6c_61_66; const MASK5: u64 = 0x00_00_00_ff_ff_ff_ff_ff; - let locval: u64 = *(loc.as_ptr().cast::()); + let locval: u64 = loc.as_ptr().cast::().read_unaligned(); // FIXME the original code looks like this: // error = ((locval & mask5) ^ fv) as u32; @@ -70,6 +72,8 @@ pub fn is_valid_false_atom(loc: &[u8]) -> bool { #[cfg_attr(not(feature = "no-inline"), inline(always))] #[allow(clippy::cast_ptr_alignment, unused_unsafe)] pub fn is_valid_null_atom(loc: &[u8]) -> bool { + debug_assert!(loc.len() >= 8, "loc too short for a u64 read"); + // TODO is this expensive? let mut error: u64; unsafe { @@ -77,7 +81,7 @@ pub fn is_valid_null_atom(loc: &[u8]) -> bool { // this is the same: const NV: u64 = 0x00_00_00_00_6c_6c_75_6e; const MASK4: u64 = 0x00_00_00_00_ff_ff_ff_ff; - let locval: u64 = *(loc.as_ptr().cast::()); + let locval: u64 = loc.as_ptr().cast::().read_unaligned(); error = (locval & MASK4) ^ NV; error |= u64::from(is_not_structural_or_whitespace(*get!(loc, 4)));