Skip to content

Commit bb047a9

Browse files
committed
refactor(codegen): use SliceIterExt in string printer (#12296)
Use `SliceIterExt` trait in the string printer. This allows removing a bunch of const generics. The dynamic implementations in `SliceIterExt` are just as efficient (or maybe more).
1 parent 9e1acc2 commit bb047a9

File tree

2 files changed

+26
-37
lines changed

2 files changed

+26
-37
lines changed

crates/oxc_codegen/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ doctest = true
2222
[dependencies]
2323
oxc_allocator = { workspace = true }
2424
oxc_ast = { workspace = true }
25-
oxc_data_structures = { workspace = true, features = ["code_buffer", "pointer_ext", "stack"] }
25+
oxc_data_structures = { workspace = true, features = ["code_buffer", "pointer_ext", "slice_iter_ext", "stack"] }
2626
oxc_index = { workspace = true }
2727
oxc_semantic = { workspace = true }
2828
oxc_sourcemap = { workspace = true }

crates/oxc_codegen/src/str.rs

Lines changed: 25 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
use std::slice;
22

33
use oxc_ast::ast::StringLiteral;
4-
use oxc_data_structures::{assert_unchecked, pointer_ext::PointerExt};
4+
use oxc_data_structures::{
5+
assert_unchecked, pointer_ext::PointerExt, slice_iter_ext::SliceIterExt,
6+
};
57
use oxc_syntax::identifier::{LS, NBSP, PS};
68

79
use crate::Codegen;
@@ -118,34 +120,22 @@ impl PrintStringState<'_> {
118120
/// before calling other methods e.g. `flush`.
119121
#[inline]
120122
unsafe fn consume_byte_unchecked(&mut self) {
121-
// `assert_unchecked!` produces less instructions than `self.bytes.next().unwrap_unchecked()`
122-
// https://godbolt.org/z/TWzfK1eKj
123-
124123
// SAFETY: Caller guarantees there is a byte to consume in `bytes` iterator,
125-
// and that consuming it leaves the iterator on a UTF-8 char boundary.
126-
unsafe { assert_unchecked!(!self.bytes.as_slice().is_empty()) };
127-
self.bytes.next().unwrap();
124+
// and that consuming it leaves the iterator on a UTF-8 char boundary
125+
unsafe { self.bytes.next_unchecked() };
128126
}
129127

130-
/// Advance the `bytes` iterator by `N` bytes.
128+
/// Advance the `bytes` iterator by `count` bytes.
131129
///
132130
/// # SAFETY
133131
///
134-
/// * There must be at least `N` more bytes in the `bytes` iterator.
132+
/// * There must be at least `count` more bytes in the `bytes` iterator.
135133
/// * After this call, `bytes` iterator must be left on a UTF-8 character boundary.
136134
#[inline]
137-
unsafe fn consume_bytes_unchecked<const N: usize>(&mut self) {
138-
// `assert_unchecked!` produces many less instructions than
139-
// `for _i in 0..N { self.bytes.next().unwrap_unchecked(); }`.
140-
// The `unwrap` in loop below is required for compact assembly.
141-
// https://godbolt.org/z/TWzfK1eKj
142-
143-
// SAFETY: Caller guarantees there are `N` bytes to consume in `bytes` iterator,
135+
unsafe fn consume_bytes_unchecked(&mut self, count: usize) {
136+
// SAFETY: Caller guarantees there are `count` bytes to consume in `bytes` iterator,
144137
// and that consuming them leaves the iterator on a UTF-8 char boundary.
145-
unsafe { assert_unchecked!(self.bytes.as_slice().len() >= N) };
146-
for _i in 0..N {
147-
self.bytes.next().unwrap();
148-
}
138+
unsafe { self.bytes.advance_unchecked(count) };
149139
}
150140

151141
/// Set the start of next chunk to be current position of `bytes` iterator.
@@ -164,23 +154,22 @@ impl PrintStringState<'_> {
164154
#[inline]
165155
unsafe fn flush_and_consume_byte(&mut self, codegen: &mut Codegen) {
166156
// SAFETY: Caller guarantees `flush_and_consume_bytes`'s requirements are met
167-
unsafe { self.flush_and_consume_bytes::<1>(codegen) };
157+
unsafe { self.flush_and_consume_bytes(codegen, 1) };
168158
}
169159

170-
/// Flush current chunk to buffer, consume `N` bytes, and start next chunk after those bytes.
160+
/// Flush current chunk to buffer, consume `count` bytes, and start next chunk after those bytes.
171161
///
172162
/// # SAFETY
173163
///
174-
/// * There must be at least `N` more bytes in the `bytes` iterator.
164+
/// * There must be at least `count` more bytes in the `bytes` iterator.
175165
/// * After this call, `bytes` iterator must be left on a UTF-8 character boundary.
176166
#[inline]
177-
unsafe fn flush_and_consume_bytes<const N: usize>(&mut self, codegen: &mut Codegen) {
167+
unsafe fn flush_and_consume_bytes(&mut self, codegen: &mut Codegen, count: usize) {
178168
self.flush(codegen);
179169

180-
debug_assert!(self.bytes.as_slice().len() >= N);
181-
// SAFETY: Caller guarantees there are `N` bytes to consume in `bytes` iterator,
170+
// SAFETY: Caller guarantees there are `count` bytes to consume in `bytes` iterator,
182171
// and that consuming them leaves the iterator on a UTF-8 char boundary
183-
unsafe { self.consume_bytes_unchecked::<N>() };
172+
unsafe { self.consume_bytes_unchecked(count) };
184173

185174
self.start_chunk();
186175
}
@@ -601,7 +590,7 @@ unsafe fn print_less_than(codegen: &mut Codegen, state: &mut PrintStringState) {
601590
// SAFETY: The check above ensures there are 6 bytes left, after consuming 2 already.
602591
// `script` / `SCRIPT` is all ASCII bytes, so skipping them leaves `bytes` iterator
603592
// positioned on UTF-8 char boundary.
604-
unsafe { state.consume_bytes_unchecked::<6>() };
593+
unsafe { state.consume_bytes_unchecked(6) };
605594
}
606595
}
607596

@@ -622,13 +611,13 @@ unsafe fn print_ls_or_ps(codegen: &mut Codegen, state: &mut PrintStringState) {
622611
_ => {
623612
// Some other character starting with 0xE2. Advance past it.
624613
// SAFETY: 0xE2 is always the start of a 3-byte Unicode character
625-
unsafe { state.consume_bytes_unchecked::<3>() };
614+
unsafe { state.consume_bytes_unchecked(3) };
626615
return;
627616
}
628617
};
629618

630619
// SAFETY: 0xE2 is always the start of a 3-byte Unicode character
631-
unsafe { state.flush_and_consume_bytes::<3>(codegen) };
620+
unsafe { state.flush_and_consume_bytes(codegen, 3) };
632621
codegen.print_str(replacement);
633622
}
634623

@@ -642,12 +631,12 @@ unsafe fn print_non_breaking_space(codegen: &mut Codegen, state: &mut PrintStrin
642631
if next == NBSP_LAST_BYTE {
643632
// Character is NBSP.
644633
// SAFETY: 0xC2 is always the start of a 2-byte Unicode character.
645-
unsafe { state.flush_and_consume_bytes::<2>(codegen) };
634+
unsafe { state.flush_and_consume_bytes(codegen, 2) };
646635
codegen.print_str("\\xA0");
647636
} else {
648637
// Some other character starting with 0xC2. Advance past it.
649638
// SAFETY: 0xC2 is always the start of a 2-byte Unicode character.
650-
unsafe { state.consume_bytes_unchecked::<2>() };
639+
unsafe { state.consume_bytes_unchecked(2) };
651640
}
652641
}
653642

@@ -675,12 +664,12 @@ unsafe fn print_lossy_replacement(codegen: &mut Codegen, state: &mut PrintString
675664
// Actual lossy replacement character.
676665
// Flush up to and including the lossy replacement character, then skip the 4 hex bytes.
677666
// SAFETY: 0xEF is always the start of a 3-byte Unicode character
678-
unsafe { state.consume_bytes_unchecked::<3>() };
667+
unsafe { state.consume_bytes_unchecked(3) };
679668
state.flush(codegen);
680669
// SAFETY: 0xEF is always the start of a 3-byte Unicode character.
681670
// `bytes.as_slice()[3..7]` would have panicked if there weren't 4 more bytes after it.
682671
// All those bytes are ASCII, so this leaves `bytes` on a UTF-8 char boundary.
683-
unsafe { state.consume_bytes_unchecked::<4>() };
672+
unsafe { state.consume_bytes_unchecked(4) };
684673
// Start next chunk after the 4 hex bytes
685674
state.start_chunk();
686675
return;
@@ -695,7 +684,7 @@ unsafe fn print_lossy_replacement(codegen: &mut Codegen, state: &mut PrintString
695684
// SAFETY: `bytes.as_slice()[3..7]` would have panicked if there weren't at least 7 bytes
696685
// remaining. First 3 bytes are lossy replacement character, and we just checked that
697686
// next 4 bytes are ASCII, so this leaves `bytes` on a UTF-8 char boundary.
698-
unsafe { state.consume_bytes_unchecked::<7>() };
687+
unsafe { state.consume_bytes_unchecked(7) };
699688

700689
// Start next chunk after the 4 hex bytes
701690
state.start_chunk();
@@ -711,7 +700,7 @@ unsafe fn print_lossy_replacement(codegen: &mut Codegen, state: &mut PrintString
711700
// `lone_surrogates` is `false` or character is some other character starting with 0xEF.
712701
// Advance past the character.
713702
// SAFETY: 0xEF is always the start of a 3-byte Unicode character
714-
unsafe { state.consume_bytes_unchecked::<3>() };
703+
unsafe { state.consume_bytes_unchecked(3) };
715704
}
716705

717706
/// Call a closure while hinting to compiler that this branch is rarely taken.

0 commit comments

Comments
 (0)