11use std:: slice;
22
33use oxc_ast:: ast:: StringLiteral ;
4- use oxc_data_structures:: { assert_unchecked, pointer_ext:: PointerExt } ;
4+ use oxc_data_structures:: {
5+ assert_unchecked, pointer_ext:: PointerExt , slice_iter_ext:: SliceIterExt ,
6+ } ;
57use oxc_syntax:: identifier:: { LS , NBSP , PS } ;
68
79use crate :: Codegen ;
@@ -118,34 +120,22 @@ impl PrintStringState<'_> {
118120 /// before calling other methods e.g. `flush`.
119121 #[ inline]
120122 unsafe fn consume_byte_unchecked ( & mut self ) {
121- // `assert_unchecked!` produces less instructions than `self.bytes.next().unwrap_unchecked()`
122- // https://godbolt.org/z/TWzfK1eKj
123-
124123 // SAFETY: Caller guarantees there is a byte to consume in `bytes` iterator,
125- // and that consuming it leaves the iterator on a UTF-8 char boundary.
126- unsafe { assert_unchecked ! ( !self . bytes. as_slice( ) . is_empty( ) ) } ;
127- self . bytes . next ( ) . unwrap ( ) ;
124+ // and that consuming it leaves the iterator on a UTF-8 char boundary
125+ unsafe { self . bytes . next_unchecked ( ) } ;
128126 }
129127
130- /// Advance the `bytes` iterator by `N ` bytes.
128+ /// Advance the `bytes` iterator by `count ` bytes.
131129 ///
132130 /// # SAFETY
133131 ///
134- /// * There must be at least `N ` more bytes in the `bytes` iterator.
132+ /// * There must be at least `count ` more bytes in the `bytes` iterator.
135133 /// * After this call, `bytes` iterator must be left on a UTF-8 character boundary.
136134 #[ inline]
137- unsafe fn consume_bytes_unchecked < const N : usize > ( & mut self ) {
138- // `assert_unchecked!` produces many less instructions than
139- // `for _i in 0..N { self.bytes.next().unwrap_unchecked(); }`.
140- // The `unwrap` in loop below is required for compact assembly.
141- // https://godbolt.org/z/TWzfK1eKj
142-
143- // SAFETY: Caller guarantees there are `N` bytes to consume in `bytes` iterator,
135+ unsafe fn consume_bytes_unchecked ( & mut self , count : usize ) {
136+ // SAFETY: Caller guarantees there are `count` bytes to consume in `bytes` iterator,
144137 // and that consuming them leaves the iterator on a UTF-8 char boundary.
145- unsafe { assert_unchecked ! ( self . bytes. as_slice( ) . len( ) >= N ) } ;
146- for _i in 0 ..N {
147- self . bytes . next ( ) . unwrap ( ) ;
148- }
138+ unsafe { self . bytes . advance_unchecked ( count) } ;
149139 }
150140
151141 /// Set the start of next chunk to be current position of `bytes` iterator.
@@ -164,23 +154,22 @@ impl PrintStringState<'_> {
164154 #[ inline]
165155 unsafe fn flush_and_consume_byte ( & mut self , codegen : & mut Codegen ) {
166156 // SAFETY: Caller guarantees `flush_and_consume_bytes`'s requirements are met
167- unsafe { self . flush_and_consume_bytes :: < 1 > ( codegen) } ;
157+ unsafe { self . flush_and_consume_bytes ( codegen, 1 ) } ;
168158 }
169159
170- /// Flush current chunk to buffer, consume `N ` bytes, and start next chunk after those bytes.
160+ /// Flush current chunk to buffer, consume `count ` bytes, and start next chunk after those bytes.
171161 ///
172162 /// # SAFETY
173163 ///
174- /// * There must be at least `N ` more bytes in the `bytes` iterator.
164+ /// * There must be at least `count ` more bytes in the `bytes` iterator.
175165 /// * After this call, `bytes` iterator must be left on a UTF-8 character boundary.
176166 #[ inline]
177- unsafe fn flush_and_consume_bytes < const N : usize > ( & mut self , codegen : & mut Codegen ) {
167+ unsafe fn flush_and_consume_bytes ( & mut self , codegen : & mut Codegen , count : usize ) {
178168 self . flush ( codegen) ;
179169
180- debug_assert ! ( self . bytes. as_slice( ) . len( ) >= N ) ;
181- // SAFETY: Caller guarantees there are `N` bytes to consume in `bytes` iterator,
170+ // SAFETY: Caller guarantees there are `count` bytes to consume in `bytes` iterator,
182171 // and that consuming them leaves the iterator on a UTF-8 char boundary
183- unsafe { self . consume_bytes_unchecked :: < N > ( ) } ;
172+ unsafe { self . consume_bytes_unchecked ( count ) } ;
184173
185174 self . start_chunk ( ) ;
186175 }
@@ -601,7 +590,7 @@ unsafe fn print_less_than(codegen: &mut Codegen, state: &mut PrintStringState) {
601590 // SAFETY: The check above ensures there are 6 bytes left, after consuming 2 already.
602591 // `script` / `SCRIPT` is all ASCII bytes, so skipping them leaves `bytes` iterator
603592 // positioned on UTF-8 char boundary.
604- unsafe { state. consume_bytes_unchecked :: < 6 > ( ) } ;
593+ unsafe { state. consume_bytes_unchecked ( 6 ) } ;
605594 }
606595}
607596
@@ -622,13 +611,13 @@ unsafe fn print_ls_or_ps(codegen: &mut Codegen, state: &mut PrintStringState) {
622611 _ => {
623612 // Some other character starting with 0xE2. Advance past it.
624613 // SAFETY: 0xE2 is always the start of a 3-byte Unicode character
625- unsafe { state. consume_bytes_unchecked :: < 3 > ( ) } ;
614+ unsafe { state. consume_bytes_unchecked ( 3 ) } ;
626615 return ;
627616 }
628617 } ;
629618
630619 // SAFETY: 0xE2 is always the start of a 3-byte Unicode character
631- unsafe { state. flush_and_consume_bytes :: < 3 > ( codegen) } ;
620+ unsafe { state. flush_and_consume_bytes ( codegen, 3 ) } ;
632621 codegen. print_str ( replacement) ;
633622}
634623
@@ -642,12 +631,12 @@ unsafe fn print_non_breaking_space(codegen: &mut Codegen, state: &mut PrintStrin
642631 if next == NBSP_LAST_BYTE {
643632 // Character is NBSP.
644633 // SAFETY: 0xC2 is always the start of a 2-byte Unicode character.
645- unsafe { state. flush_and_consume_bytes :: < 2 > ( codegen) } ;
634+ unsafe { state. flush_and_consume_bytes ( codegen, 2 ) } ;
646635 codegen. print_str ( "\\ xA0" ) ;
647636 } else {
648637 // Some other character starting with 0xC2. Advance past it.
649638 // SAFETY: 0xC2 is always the start of a 2-byte Unicode character.
650- unsafe { state. consume_bytes_unchecked :: < 2 > ( ) } ;
639+ unsafe { state. consume_bytes_unchecked ( 2 ) } ;
651640 }
652641}
653642
@@ -675,12 +664,12 @@ unsafe fn print_lossy_replacement(codegen: &mut Codegen, state: &mut PrintString
675664 // Actual lossy replacement character.
676665 // Flush up to and including the lossy replacement character, then skip the 4 hex bytes.
677666 // SAFETY: 0xEF is always the start of a 3-byte Unicode character
678- unsafe { state. consume_bytes_unchecked :: < 3 > ( ) } ;
667+ unsafe { state. consume_bytes_unchecked ( 3 ) } ;
679668 state. flush ( codegen) ;
680669 // SAFETY: 0xEF is always the start of a 3-byte Unicode character.
681670 // `bytes.as_slice()[3..7]` would have panicked if there weren't 4 more bytes after it.
682671 // All those bytes are ASCII, so this leaves `bytes` on a UTF-8 char boundary.
683- unsafe { state. consume_bytes_unchecked :: < 4 > ( ) } ;
672+ unsafe { state. consume_bytes_unchecked ( 4 ) } ;
684673 // Start next chunk after the 4 hex bytes
685674 state. start_chunk ( ) ;
686675 return ;
@@ -695,7 +684,7 @@ unsafe fn print_lossy_replacement(codegen: &mut Codegen, state: &mut PrintString
695684 // SAFETY: `bytes.as_slice()[3..7]` would have panicked if there weren't at least 7 bytes
696685 // remaining. First 3 bytes are lossy replacement character, and we just checked that
697686 // next 4 bytes are ASCII, so this leaves `bytes` on a UTF-8 char boundary.
698- unsafe { state. consume_bytes_unchecked :: < 7 > ( ) } ;
687+ unsafe { state. consume_bytes_unchecked ( 7 ) } ;
699688
700689 // Start next chunk after the 4 hex bytes
701690 state. start_chunk ( ) ;
@@ -711,7 +700,7 @@ unsafe fn print_lossy_replacement(codegen: &mut Codegen, state: &mut PrintString
711700 // `lone_surrogates` is `false` or character is some other character starting with 0xEF.
712701 // Advance past the character.
713702 // SAFETY: 0xEF is always the start of a 3-byte Unicode character
714- unsafe { state. consume_bytes_unchecked :: < 3 > ( ) } ;
703+ unsafe { state. consume_bytes_unchecked ( 3 ) } ;
715704}
716705
717706/// Call a closure while hinting to compiler that this branch is rarely taken.
0 commit comments