Skip to content

Commit

Permalink
Auto merge of rust-lang#136538 - hkBst:cleanup_lexer_unescape, r=<try>
Browse files Browse the repository at this point in the history
Restructure rustc_lexer::unescape

Separate the functions for unescaping each kind of string and unit:
 - this duplicates some code, but also gets rid of code that is only there for genericity
 - each function is now simpler by inlining booleans, which might lead to faster code

Use a Peekable<CharIndices<'_>> instead of going back and forth between string slice and chars iterator.
 - this gets rid of most position computations
 - allows removal of double traversal for correct backslash newline escapes in skip_ascii_whitespace

Improves documentation
  • Loading branch information
bors committed Feb 10, 2025
2 parents c03c38d + e00522b commit 75f0e3d
Show file tree
Hide file tree
Showing 7 changed files with 443 additions and 371 deletions.
13 changes: 6 additions & 7 deletions compiler/rustc_ast/src/util/literal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
use std::{ascii, fmt, str};

use rustc_lexer::unescape::{
MixedUnit, Mode, byte_from_char, unescape_byte, unescape_char, unescape_mixed, unescape_unicode,
MixedUnit, unescape_byte, unescape_byte_str, unescape_char, unescape_cstr, unescape_str,
};
use rustc_span::{Span, Symbol, kw, sym};
use tracing::debug;
Expand Down Expand Up @@ -87,11 +87,10 @@ impl LitKind {
// Force-inlining here is aggressive but the closure is
// called on every char in the string, so it can be hot in
// programs with many long strings containing escapes.
unescape_unicode(
unescape_str(
s,
Mode::Str,
&mut #[inline(always)]
|_, c| match c {
|_, res| match res {
Ok(c) => buf.push(c),
Err(err) => {
assert!(!err.is_fatal(), "failed to unescape string literal")
Expand All @@ -111,8 +110,8 @@ impl LitKind {
token::ByteStr => {
let s = symbol.as_str();
let mut buf = Vec::with_capacity(s.len());
unescape_unicode(s, Mode::ByteStr, &mut |_, c| match c {
Ok(c) => buf.push(byte_from_char(c)),
unescape_byte_str(s, &mut |_, res| match res {
Ok(b) => buf.push(b),
Err(err) => {
assert!(!err.is_fatal(), "failed to unescape string literal")
}
Expand All @@ -128,7 +127,7 @@ impl LitKind {
token::CStr => {
let s = symbol.as_str();
let mut buf = Vec::with_capacity(s.len());
unescape_mixed(s, Mode::CStr, &mut |_span, c| match c {
unescape_cstr(s, &mut |_span, c| match c {
Ok(MixedUnit::Char(c)) => {
buf.extend_from_slice(c.encode_utf8(&mut [0; 4]).as_bytes())
}
Expand Down
Loading

0 comments on commit 75f0e3d

Please sign in to comment.