Skip to content

Commit a2f180d

Browse files
committed
Auto merge of rust-lang#125317 - joboet:optimize_escape, r=<try>
Optimize `EscapeIterInner` This optimizes `EscapeIterInner` by using `MaybeUninit` for unused array elements instead of initializing them with `ascii::Char::Null`. Follow up to rust-lang#124307, CC `@reitermarkus`
2 parents e8ada6a + cc56a48 commit a2f180d

File tree

1 file changed

+77
-91
lines changed

1 file changed

+77
-91
lines changed

library/core/src/escape.rs

+77-91
Original file line numberDiff line numberDiff line change
@@ -1,125 +1,107 @@
11
//! Helper code for character escaping.
22
33
use crate::ascii;
4+
use crate::mem::MaybeUninit;
45
use crate::num::NonZero;
56
use crate::ops::Range;
67

78
const HEX_DIGITS: [ascii::Char; 16] = *b"0123456789abcdef".as_ascii().unwrap();
89

9-
#[inline]
10-
const fn backslash<const N: usize>(a: ascii::Char) -> ([ascii::Char; N], Range<u8>) {
11-
const { assert!(N >= 2) };
12-
13-
let mut output = [ascii::Char::Null; N];
14-
15-
output[0] = ascii::Char::ReverseSolidus;
16-
output[1] = a;
17-
18-
(output, 0..2)
19-
}
20-
21-
/// Escapes an ASCII character.
22-
///
23-
/// Returns a buffer and the length of the escaped representation.
24-
const fn escape_ascii<const N: usize>(byte: u8) -> ([ascii::Char; N], Range<u8>) {
25-
const { assert!(N >= 4) };
26-
27-
match byte {
28-
b'\t' => backslash(ascii::Char::SmallT),
29-
b'\r' => backslash(ascii::Char::SmallR),
30-
b'\n' => backslash(ascii::Char::SmallN),
31-
b'\\' => backslash(ascii::Char::ReverseSolidus),
32-
b'\'' => backslash(ascii::Char::Apostrophe),
33-
b'\"' => backslash(ascii::Char::QuotationMark),
34-
byte => {
35-
let mut output = [ascii::Char::Null; N];
36-
37-
if let Some(c) = byte.as_ascii()
38-
&& !byte.is_ascii_control()
39-
{
40-
output[0] = c;
41-
(output, 0..1)
42-
} else {
43-
let hi = HEX_DIGITS[(byte >> 4) as usize];
44-
let lo = HEX_DIGITS[(byte & 0xf) as usize];
45-
46-
output[0] = ascii::Char::ReverseSolidus;
47-
output[1] = ascii::Char::SmallX;
48-
output[2] = hi;
49-
output[3] = lo;
50-
51-
(output, 0..4)
52-
}
53-
}
54-
}
55-
}
56-
57-
/// Escapes a character `\u{NNNN}` representation.
58-
///
59-
/// Returns a buffer and the length of the escaped representation.
60-
const fn escape_unicode<const N: usize>(c: char) -> ([ascii::Char; N], Range<u8>) {
61-
const { assert!(N >= 10 && N < u8::MAX as usize) };
62-
63-
let c = u32::from(c);
64-
65-
// OR-ing `1` ensures that for `c == 0` the code computes that
66-
// one digit should be printed.
67-
let start = (c | 1).leading_zeros() as usize / 4 - 2;
68-
69-
let mut output = [ascii::Char::Null; N];
70-
output[3] = HEX_DIGITS[((c >> 20) & 15) as usize];
71-
output[4] = HEX_DIGITS[((c >> 16) & 15) as usize];
72-
output[5] = HEX_DIGITS[((c >> 12) & 15) as usize];
73-
output[6] = HEX_DIGITS[((c >> 8) & 15) as usize];
74-
output[7] = HEX_DIGITS[((c >> 4) & 15) as usize];
75-
output[8] = HEX_DIGITS[((c >> 0) & 15) as usize];
76-
output[9] = ascii::Char::RightCurlyBracket;
77-
output[start + 0] = ascii::Char::ReverseSolidus;
78-
output[start + 1] = ascii::Char::SmallU;
79-
output[start + 2] = ascii::Char::LeftCurlyBracket;
80-
81-
(output, (start as u8)..(N as u8))
82-
}
83-
8410
/// An iterator over an fixed-size array.
8511
///
8612
/// This is essentially equivalent to array’s IntoIter except that indexes are
8713
/// limited to u8 to reduce size of the structure.
8814
#[derive(Clone, Debug)]
8915
pub(crate) struct EscapeIterInner<const N: usize> {
90-
// The element type ensures this is always ASCII, and thus also valid UTF-8.
91-
data: [ascii::Char; N],
16+
// Invariant: all elements inside the range indexed by `alive` are initialized
17+
data: [MaybeUninit<ascii::Char>; N],
9218

9319
// Invariant: `alive.start <= alive.end <= N`
9420
alive: Range<u8>,
9521
}
9622

9723
impl<const N: usize> EscapeIterInner<N> {
9824
pub const fn backslash(c: ascii::Char) -> Self {
99-
let (data, range) = backslash(c);
100-
Self { data, alive: range }
25+
const { assert!(N >= 2) };
26+
27+
let mut data = [MaybeUninit::uninit(); N];
28+
29+
data[0] = MaybeUninit::new(ascii::Char::ReverseSolidus);
30+
data[1] = MaybeUninit::new(c);
31+
32+
Self { data, alive: 0..2 }
10133
}
10234

35+
/// Escapes an ASCII character.
10336
pub const fn ascii(c: u8) -> Self {
104-
let (data, range) = escape_ascii(c);
105-
Self { data, alive: range }
37+
const { assert!(N >= 4) };
38+
39+
match c {
40+
b'\t' => Self::backslash(ascii::Char::SmallT),
41+
b'\r' => Self::backslash(ascii::Char::SmallR),
42+
b'\n' => Self::backslash(ascii::Char::SmallN),
43+
b'\\' => Self::backslash(ascii::Char::ReverseSolidus),
44+
b'\'' => Self::backslash(ascii::Char::Apostrophe),
45+
b'\"' => Self::backslash(ascii::Char::QuotationMark),
46+
byte => {
47+
let mut data = [MaybeUninit::uninit(); N];
48+
49+
if let Some(c) = byte.as_ascii()
50+
&& !byte.is_ascii_control()
51+
{
52+
data[0] = MaybeUninit::new(c);
53+
Self { data, alive: 0..1 }
54+
} else {
55+
let hi = HEX_DIGITS[(byte >> 4) as usize];
56+
let lo = HEX_DIGITS[(byte & 0xf) as usize];
57+
58+
data[0] = MaybeUninit::new(ascii::Char::ReverseSolidus);
59+
data[1] = MaybeUninit::new(ascii::Char::SmallX);
60+
data[2] = MaybeUninit::new(hi);
61+
data[3] = MaybeUninit::new(lo);
62+
63+
Self { data, alive: 0..4 }
64+
}
65+
}
66+
}
10667
}
10768

69+
/// Escapes a character `\u{NNNN}` representation.
10870
pub const fn unicode(c: char) -> Self {
109-
let (data, range) = escape_unicode(c);
110-
Self { data, alive: range }
71+
const { assert!(N >= 10 && N < u8::MAX as usize) };
72+
73+
let c = c as u32;
74+
75+
// OR-ing `1` ensures that for `c == 0` the code computes that
76+
// one digit should be printed.
77+
let start = (c | 1).leading_zeros() as usize / 4 - 2;
78+
79+
let mut data = [MaybeUninit::uninit(); N];
80+
data[3] = MaybeUninit::new(HEX_DIGITS[((c >> 20) & 15) as usize]);
81+
data[4] = MaybeUninit::new(HEX_DIGITS[((c >> 16) & 15) as usize]);
82+
data[5] = MaybeUninit::new(HEX_DIGITS[((c >> 12) & 15) as usize]);
83+
data[6] = MaybeUninit::new(HEX_DIGITS[((c >> 8) & 15) as usize]);
84+
data[7] = MaybeUninit::new(HEX_DIGITS[((c >> 4) & 15) as usize]);
85+
data[8] = MaybeUninit::new(HEX_DIGITS[((c >> 0) & 15) as usize]);
86+
data[9] = MaybeUninit::new(ascii::Char::RightCurlyBracket);
87+
data[start + 0] = MaybeUninit::new(ascii::Char::ReverseSolidus);
88+
data[start + 1] = MaybeUninit::new(ascii::Char::SmallU);
89+
data[start + 2] = MaybeUninit::new(ascii::Char::LeftCurlyBracket);
90+
91+
Self { data, alive: start as u8..10 }
11192
}
11293

11394
#[inline]
11495
pub const fn empty() -> Self {
115-
Self { data: [ascii::Char::Null; N], alive: 0..0 }
96+
Self { data: [MaybeUninit::uninit(); N], alive: 0..0 }
11697
}
11798

11899
#[inline]
119100
pub fn as_ascii(&self) -> &[ascii::Char] {
120-
// SAFETY: `self.alive` is guaranteed to be a valid range for indexing `self.data`.
101+
// SAFETY: the range indexed by `self.alive` is guaranteed to contain valid data.
121102
unsafe {
122-
self.data.get_unchecked(usize::from(self.alive.start)..usize::from(self.alive.end))
103+
let data = self.data.get_unchecked(self.alive.start as usize..self.alive.end as usize);
104+
MaybeUninit::slice_assume_init_ref(data)
123105
}
124106
}
125107

@@ -130,27 +112,31 @@ impl<const N: usize> EscapeIterInner<N> {
130112

131113
#[inline]
132114
pub fn len(&self) -> usize {
133-
usize::from(self.alive.end - self.alive.start)
115+
self.alive.len()
134116
}
135117

118+
#[inline]
136119
pub fn next(&mut self) -> Option<u8> {
137120
let i = self.alive.next()?;
138121

139-
// SAFETY: `i` is guaranteed to be a valid index for `self.data`.
140-
unsafe { Some(self.data.get_unchecked(usize::from(i)).to_u8()) }
122+
// SAFETY: the range indexed by `self.alive` is guaranteed to contain initialized data.
123+
unsafe { Some(MaybeUninit::assume_init_ref(self.data.get_unchecked(i as usize)).to_u8()) }
141124
}
142125

126+
#[inline]
143127
pub fn next_back(&mut self) -> Option<u8> {
144128
let i = self.alive.next_back()?;
145129

146-
// SAFETY: `i` is guaranteed to be a valid index for `self.data`.
147-
unsafe { Some(self.data.get_unchecked(usize::from(i)).to_u8()) }
130+
// SAFETY: the range indexed by `self.alive` is guaranteed to contain initialized data.
131+
unsafe { Some(MaybeUninit::assume_init_ref(self.data.get_unchecked(i as usize)).to_u8()) }
148132
}
149133

134+
#[inline]
150135
pub fn advance_by(&mut self, n: usize) -> Result<(), NonZero<usize>> {
151136
self.alive.advance_by(n)
152137
}
153138

139+
#[inline]
154140
pub fn advance_back_by(&mut self, n: usize) -> Result<(), NonZero<usize>> {
155141
self.alive.advance_back_by(n)
156142
}

0 commit comments

Comments
 (0)