Skip to content

Commit

Permalink
Remove unsafe from likely/unlikely
Browse files Browse the repository at this point in the history
For some reason, it's marked as unsafe to mimic the intrinsic, but the
intrinsic appears to always have been safe: rust-lang/rust@b778f7f
  • Loading branch information
glandium authored and hsivonen committed Aug 11, 2022
1 parent 0503648 commit d4d7d2a
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 35 deletions.
6 changes: 2 additions & 4 deletions src/ascii.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,14 +40,12 @@ cfg_if! {
} else {
#[allow(dead_code)]
#[inline(always)]
// Unsafe to match the intrinsic, which is needlessly unsafe.
unsafe fn unlikely(b: bool) -> bool {
fn unlikely(b: bool) -> bool {
b
}
#[allow(dead_code)]
#[inline(always)]
// Unsafe to match the intrinsic, which is needlessly unsafe.
unsafe fn likely(b: bool) -> bool {
fn likely(b: bool) -> bool {
b
}
}
Expand Down
16 changes: 7 additions & 9 deletions src/mem.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,13 +50,11 @@ cfg_if! {
use ::core::intrinsics::unlikely;
} else {
#[inline(always)]
// Unsafe to match the intrinsic, which is needlessly unsafe.
unsafe fn likely(b: bool) -> bool {
fn likely(b: bool) -> bool {
b
}
#[inline(always)]
// Unsafe to match the intrinsic, which is needlessly unsafe.
unsafe fn unlikely(b: bool) -> bool {
fn unlikely(b: bool) -> bool {
b
}
}
Expand Down Expand Up @@ -915,7 +913,7 @@ pub fn is_utf8_bidi(buffer: &[u8]) -> bool {
{
return true;
}
if unsafe { unlikely(second == 0x90 || second == 0x9E) } {
if unlikely(second == 0x90 || second == 0x9E) {
let third = src[read + 2];
if third >= 0xA0 {
return true;
Expand Down Expand Up @@ -1173,7 +1171,7 @@ pub fn is_str_bidi(buffer: &str) -> bool {
// Two-byte
// Adding `unlikely` here improved throughput on
// Russian plain text by 33%!
if unsafe { unlikely(byte >= 0xD6) } {
if unlikely(byte >= 0xD6) {
if byte == 0xD6 {
let second = bytes[read + 1];
if second > 0x8F {
Expand All @@ -1197,7 +1195,7 @@ pub fn is_str_bidi(buffer: &str) -> bool {
}
} else if byte < 0xF0 {
// Three-byte
if unsafe { unlikely(!in_inclusive_range8(byte, 0xE3, 0xEE) && byte != 0xE1) } {
if unlikely(!in_inclusive_range8(byte, 0xE3, 0xEE) && byte != 0xE1) {
let second = bytes[read + 1];
if byte == 0xE0 {
if second < 0xA4 {
Expand Down Expand Up @@ -1246,7 +1244,7 @@ pub fn is_str_bidi(buffer: &str) -> bool {
} else {
// Four-byte
let second = bytes[read + 1];
if unsafe { unlikely(byte == 0xF0 && (second == 0x90 || second == 0x9E)) } {
if unlikely(byte == 0xF0 && (second == 0x90 || second == 0x9E)) {
let third = bytes[read + 2];
if third >= 0xA0 {
return true;
Expand Down Expand Up @@ -1660,7 +1658,7 @@ pub fn convert_utf16_to_utf8_partial(src: &[u16], dst: &mut [u8]) -> (usize, usi
// basic blocks out-of-lined to the end of the function would wipe
// away a quarter of Arabic encode performance on Haswell!
let (read, written) = convert_utf16_to_utf8_partial_inner(src, dst);
if unsafe { likely(read == src.len()) } {
if likely(read == src.len()) {
return (read, written);
}
let (tail_read, tail_written) =
Expand Down
42 changes: 20 additions & 22 deletions src/utf_8.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,11 @@ cfg_if! {
use ::core::intrinsics::likely;
} else {
#[inline(always)]
// Unsafe to match the intrinsic, which is needlessly unsafe.
unsafe fn unlikely(b: bool) -> bool {
fn unlikely(b: bool) -> bool {
b
}
#[inline(always)]
// Unsafe to match the intrinsic, which is needlessly unsafe.
unsafe fn likely(b: bool) -> bool {
fn likely(b: bool) -> bool {
b
}
}
Expand Down Expand Up @@ -88,14 +86,14 @@ pub fn utf8_valid_up_to(src: &[u8]) -> usize {
// to overflow would mean that the source slice would be so large that
// the address space of the process would not have space for any code.
// Therefore, the slice cannot be so long that this would overflow.
if unsafe { likely(read + 4 <= src.len()) } {
if likely(read + 4 <= src.len()) {
'inner: loop {
// At this point, `byte` is not included in `read`, because we
// don't yet know that a) the UTF-8 sequence is valid and b) that there
// is output space if it is an astral sequence.
// Inspecting the lead byte directly is faster than what the
// std lib does!
if unsafe { likely(in_inclusive_range8(byte, 0xC2, 0xDF)) } {
if likely(in_inclusive_range8(byte, 0xC2, 0xDF)) {
// Two-byte
let second = unsafe { *(src.get_unchecked(read + 1)) };
if !in_inclusive_range8(second, 0x80, 0xBF) {
Expand All @@ -104,7 +102,7 @@ pub fn utf8_valid_up_to(src: &[u8]) -> usize {
read += 2;

// Next lead (manually inlined)
if unsafe { likely(read + 4 <= src.len()) } {
if likely(read + 4 <= src.len()) {
byte = unsafe { *(src.get_unchecked(read)) };
if byte < 0x80 {
read += 1;
Expand All @@ -114,7 +112,7 @@ pub fn utf8_valid_up_to(src: &[u8]) -> usize {
}
break 'inner;
}
if unsafe { likely(byte < 0xF0) } {
if likely(byte < 0xF0) {
'three: loop {
// Three-byte
let second = unsafe { *(src.get_unchecked(read + 1)) };
Expand All @@ -129,12 +127,12 @@ pub fn utf8_valid_up_to(src: &[u8]) -> usize {
read += 3;

// Next lead (manually inlined)
if unsafe { likely(read + 4 <= src.len()) } {
if likely(read + 4 <= src.len()) {
byte = unsafe { *(src.get_unchecked(read)) };
if in_inclusive_range8(byte, 0xE0, 0xEF) {
continue 'three;
}
if unsafe { likely(byte < 0x80) } {
if likely(byte < 0x80) {
read += 1;
continue 'outer;
}
Expand All @@ -159,7 +157,7 @@ pub fn utf8_valid_up_to(src: &[u8]) -> usize {
read += 4;

// Next lead
if unsafe { likely(read + 4 <= src.len()) } {
if likely(read + 4 <= src.len()) {
byte = unsafe { *(src.get_unchecked(read)) };
if byte < 0x80 {
read += 1;
Expand Down Expand Up @@ -258,7 +256,7 @@ pub fn convert_utf8_to_utf16_up_to_invalid(src: &[u8], dst: &mut [u16]) -> (usiz
// to overflow would mean that the source slice would be so large that
// the address space of the process would not have space for any code.
// Therefore, the slice cannot be so long that this would overflow.
if unsafe { likely(read + 4 <= src.len()) } {
if likely(read + 4 <= src.len()) {
'inner: loop {
// At this point, `byte` is not included in `read`, because we
// don't yet know that a) the UTF-8 sequence is valid and b) that there
Expand All @@ -268,7 +266,7 @@ pub fn convert_utf8_to_utf16_up_to_invalid(src: &[u8], dst: &mut [u16]) -> (usiz
// for output space in the BMP cases.
// Inspecting the lead byte directly is faster than what the
// std lib does!
if unsafe { likely(in_inclusive_range8(byte, 0xC2, 0xDF)) } {
if likely(in_inclusive_range8(byte, 0xC2, 0xDF)) {
// Two-byte
let second = unsafe { *(src.get_unchecked(read + 1)) };
if !in_inclusive_range8(second, 0x80, 0xBF) {
Expand All @@ -285,7 +283,7 @@ pub fn convert_utf8_to_utf16_up_to_invalid(src: &[u8], dst: &mut [u16]) -> (usiz
if written == dst.len() {
break 'outer;
}
if unsafe { likely(read + 4 <= src.len()) } {
if likely(read + 4 <= src.len()) {
byte = unsafe { *(src.get_unchecked(read)) };
if byte < 0x80 {
unsafe { *(dst.get_unchecked_mut(written)) = u16::from(byte) };
Expand All @@ -297,7 +295,7 @@ pub fn convert_utf8_to_utf16_up_to_invalid(src: &[u8], dst: &mut [u16]) -> (usiz
}
break 'inner;
}
if unsafe { likely(byte < 0xF0) } {
if likely(byte < 0xF0) {
'three: loop {
// Three-byte
let second = unsafe { *(src.get_unchecked(read + 1)) };
Expand All @@ -320,12 +318,12 @@ pub fn convert_utf8_to_utf16_up_to_invalid(src: &[u8], dst: &mut [u16]) -> (usiz
if written == dst.len() {
break 'outer;
}
if unsafe { likely(read + 4 <= src.len()) } {
if likely(read + 4 <= src.len()) {
byte = unsafe { *(src.get_unchecked(read)) };
if in_inclusive_range8(byte, 0xE0, 0xEF) {
continue 'three;
}
if unsafe { likely(byte < 0x80) } {
if likely(byte < 0x80) {
unsafe { *(dst.get_unchecked_mut(written)) = u16::from(byte) };
read += 1;
written += 1;
Expand Down Expand Up @@ -367,7 +365,7 @@ pub fn convert_utf8_to_utf16_up_to_invalid(src: &[u8], dst: &mut [u16]) -> (usiz
if written == dst.len() {
break 'outer;
}
if unsafe { likely(read + 4 <= src.len()) } {
if likely(read + 4 <= src.len()) {
byte = unsafe { *(src.get_unchecked(read)) };
if byte < 0x80 {
unsafe { *(dst.get_unchecked_mut(written)) = u16::from(byte) };
Expand Down Expand Up @@ -654,7 +652,7 @@ pub fn convert_utf16_to_utf8_partial_inner(src: &[u16], dst: &mut [u8]) -> (usiz
break;
}
let unit_minus_surrogate_start = unit.wrapping_sub(0xD800);
if unsafe { likely(unit_minus_surrogate_start > (0xDFFF - 0xD800)) } {
if likely(unit_minus_surrogate_start > (0xDFFF - 0xD800)) {
unsafe {
*(dst.get_unchecked_mut(written)) = (unit >> 12) as u8 | 0xE0u8;
written += 1;
Expand All @@ -665,7 +663,7 @@ pub fn convert_utf16_to_utf8_partial_inner(src: &[u16], dst: &mut [u8]) -> (usiz
}
break;
}
if unsafe { likely(unit_minus_surrogate_start <= (0xDBFF - 0xD800)) } {
if likely(unit_minus_surrogate_start <= (0xDBFF - 0xD800)) {
// high surrogate
// read > src.len() is impossible, but using
// >= instead of == allows the compiler to elide a bound check.
Expand All @@ -684,7 +682,7 @@ pub fn convert_utf16_to_utf8_partial_inner(src: &[u16], dst: &mut [u8]) -> (usiz
}
let second = src[read];
let second_minus_low_surrogate_start = second.wrapping_sub(0xDC00);
if unsafe { likely(second_minus_low_surrogate_start <= (0xDFFF - 0xDC00)) } {
if likely(second_minus_low_surrogate_start <= (0xDFFF - 0xDC00)) {
// The next code unit is a low surrogate. Advance position.
read += 1;
let astral = (u32::from(unit) << 10) + u32::from(second)
Expand Down Expand Up @@ -726,7 +724,7 @@ pub fn convert_utf16_to_utf8_partial_inner(src: &[u16], dst: &mut [u8]) -> (usiz
return (read, written);
}
unit = src[read];
if unsafe { unlikely(unit < 0x80) } {
if unlikely(unit < 0x80) {
// written > dst.len() is impossible, but using
// >= instead of == allows the compiler to elide a bound check.
if written >= dst.len() {
Expand Down

0 comments on commit d4d7d2a

Please sign in to comment.