Skip to content

Commit 77ad625

Browse files
authored
Unrolled build for rust-lang#119808
Rollup merge of rust-lang#119808 - GnomedDev:encode-charsearcher-size-in-type, r=Mark-Simulacrum Store core::str::CharSearcher::utf8_size as u8 This is already relied on being smaller than u8 due to the `safety invariant: utf8_size must be less than 5`, so this helps LLVM optimize and maybe improve copies due to padding instead of unused bytes.
2 parents 43d3470 + 601f2d1 commit 77ad625

File tree

1 file changed

+23
-11
lines changed

1 file changed

+23
-11
lines changed

library/core/src/str/pattern.rs

+23-11
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040

4141
use crate::cmp;
4242
use crate::cmp::Ordering;
43+
use crate::convert::TryInto as _;
4344
use crate::fmt;
4445
use crate::slice::memchr;
4546

@@ -370,11 +371,17 @@ pub struct CharSearcher<'a> {
370371

371372
// safety invariant: `utf8_size` must be less than 5
372373
/// The number of bytes `needle` takes up when encoded in utf8.
373-
utf8_size: usize,
374+
utf8_size: u8,
374375
/// A utf8 encoded copy of the `needle`
375376
utf8_encoded: [u8; 4],
376377
}
377378

379+
impl CharSearcher<'_> {
380+
fn utf8_size(&self) -> usize {
381+
self.utf8_size.into()
382+
}
383+
}
384+
378385
unsafe impl<'a> Searcher<'a> for CharSearcher<'a> {
379386
#[inline]
380387
fn haystack(&self) -> &'a str {
@@ -414,7 +421,7 @@ unsafe impl<'a> Searcher<'a> for CharSearcher<'a> {
414421
let bytes = self.haystack.as_bytes().get(self.finger..self.finger_back)?;
415422
// the last byte of the utf8 encoded needle
416423
// SAFETY: we have an invariant that `utf8_size < 5`
417-
let last_byte = unsafe { *self.utf8_encoded.get_unchecked(self.utf8_size - 1) };
424+
let last_byte = unsafe { *self.utf8_encoded.get_unchecked(self.utf8_size() - 1) };
418425
if let Some(index) = memchr::memchr(last_byte, bytes) {
419426
// The new finger is the index of the byte we found,
420427
// plus one, since we memchr'd for the last byte of the character.
@@ -434,10 +441,10 @@ unsafe impl<'a> Searcher<'a> for CharSearcher<'a> {
434441
// find something. When we find something the `finger` will be set
435442
// to a UTF8 boundary.
436443
self.finger += index + 1;
437-
if self.finger >= self.utf8_size {
438-
let found_char = self.finger - self.utf8_size;
444+
if self.finger >= self.utf8_size() {
445+
let found_char = self.finger - self.utf8_size();
439446
if let Some(slice) = self.haystack.as_bytes().get(found_char..self.finger) {
440-
if slice == &self.utf8_encoded[0..self.utf8_size] {
447+
if slice == &self.utf8_encoded[0..self.utf8_size()] {
441448
return Some((found_char, self.finger));
442449
}
443450
}
@@ -482,7 +489,7 @@ unsafe impl<'a> ReverseSearcher<'a> for CharSearcher<'a> {
482489
let bytes = haystack.get(self.finger..self.finger_back)?;
483490
// the last byte of the utf8 encoded needle
484491
// SAFETY: we have an invariant that `utf8_size < 5`
485-
let last_byte = unsafe { *self.utf8_encoded.get_unchecked(self.utf8_size - 1) };
492+
let last_byte = unsafe { *self.utf8_encoded.get_unchecked(self.utf8_size() - 1) };
486493
if let Some(index) = memchr::memrchr(last_byte, bytes) {
487494
// we searched a slice that was offset by self.finger,
488495
// add self.finger to recoup the original index
@@ -493,14 +500,14 @@ unsafe impl<'a> ReverseSearcher<'a> for CharSearcher<'a> {
493500
// char in the paradigm of reverse iteration). For
494501
// multibyte chars we need to skip down by the number of more
495502
// bytes they have than ASCII
496-
let shift = self.utf8_size - 1;
503+
let shift = self.utf8_size() - 1;
497504
if index >= shift {
498505
let found_char = index - shift;
499-
if let Some(slice) = haystack.get(found_char..(found_char + self.utf8_size)) {
500-
if slice == &self.utf8_encoded[0..self.utf8_size] {
506+
if let Some(slice) = haystack.get(found_char..(found_char + self.utf8_size())) {
507+
if slice == &self.utf8_encoded[0..self.utf8_size()] {
501508
// move finger to before the character found (i.e., at its start index)
502509
self.finger_back = found_char;
503-
return Some((self.finger_back, self.finger_back + self.utf8_size));
510+
return Some((self.finger_back, self.finger_back + self.utf8_size()));
504511
}
505512
}
506513
}
@@ -542,7 +549,12 @@ impl<'a> Pattern<'a> for char {
542549
#[inline]
543550
fn into_searcher(self, haystack: &'a str) -> Self::Searcher {
544551
let mut utf8_encoded = [0; 4];
545-
let utf8_size = self.encode_utf8(&mut utf8_encoded).len();
552+
let utf8_size = self
553+
.encode_utf8(&mut utf8_encoded)
554+
.len()
555+
.try_into()
556+
.expect("char len should be less than 255");
557+
546558
CharSearcher {
547559
haystack,
548560
finger: 0,

0 commit comments

Comments
 (0)