rust-lang · bors · Mar 17, 2025 · Jan 31, 2025 · Jan 31, 2025 · Jan 31, 2025
diff --git a/Cargo.lock b/Cargo.lock
@@ -2151,6 +2151,13 @@ version = "0.7.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "4ee93343901ab17bd981295f2cf0026d4ad018c7c31ba84549a4ddbb47a45104"
 
+[[package]]
+name = "literal-escaper"
+version = "0.0.0"
+dependencies = [
+ "rustc-std-workspace-std 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
 [[package]]
 name = "lld-wrapper"
 version = "0.1.0"
@@ -3328,6 +3335,12 @@ version = "1.0.1"
 name = "rustc-std-workspace-std"
 version = "1.0.1"
 
+[[package]]
+name = "rustc-std-workspace-std"
+version = "1.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "aba676a20abe46e5b0f1b0deae474aaaf31407e6c71147159890574599da04ef"
+
 [[package]]
 name = "rustc_abi"
 version = "0.0.0"
@@ -3366,6 +3379,7 @@ name = "rustc_ast"
 version = "0.0.0"
 dependencies = [
  "bitflags",
+ "literal-escaper",
  "memchr",
  "rustc_ast_ir",
  "rustc_data_structures",
@@ -4060,6 +4074,7 @@ name = "rustc_lexer"
 version = "0.0.0"
 dependencies = [
  "expect-test",
+ "literal-escaper",
  "memchr",
  "unicode-properties",
  "unicode-xid",
@@ -4325,6 +4340,7 @@ name = "rustc_parse"
 version = "0.0.0"
 dependencies = [
  "bitflags",
+ "literal-escaper",
  "rustc_ast",
  "rustc_ast_pretty",
  "rustc_data_structures",
@@ -4347,6 +4363,7 @@ dependencies = [
 name = "rustc_parse_format"
 version = "0.0.0"
 dependencies = [
+ "literal-escaper",
  "rustc_index",
  "rustc_lexer",
 ]

diff --git a/compiler/rustc_ast/Cargo.toml b/compiler/rustc_ast/Cargo.toml
@@ -6,6 +6,7 @@ edition = "2021"
 [dependencies]
 # tidy-alphabetical-start
 bitflags = "2.4.1"
+literal-escaper = { path = "../../library/literal-escaper" }
 memchr = "2.7.4"
 rustc_ast_ir = { path = "../rustc_ast_ir" }
 rustc_data_structures = { path = "../rustc_data_structures" }

diff --git a/compiler/rustc_ast/src/util/literal.rs b/compiler/rustc_ast/src/util/literal.rs
@@ -2,7 +2,7 @@
 
 use std::{ascii, fmt, str};
 
-use rustc_lexer::unescape::{
+use literal_escaper::{
     MixedUnit, Mode, byte_from_char, unescape_byte, unescape_char, unescape_mixed, unescape_unicode,
 };
 use rustc_span::{Span, Symbol, kw, sym};

diff --git a/compiler/rustc_lexer/Cargo.toml b/compiler/rustc_lexer/Cargo.toml
@@ -16,6 +16,7 @@ Rust lexer used by rustc. No stability guarantees are provided.
 [dependencies]
 memchr = "2.7.4"
 unicode-xid = "0.2.0"
+literal-escaper = { path = "../../library/literal-escaper" }
 
 [dependencies.unicode-properties]
 version = "0.1.0"

diff --git a/compiler/rustc_lexer/src/lib.rs b/compiler/rustc_lexer/src/lib.rs
@@ -27,11 +27,13 @@
 // tidy-alphabetical-end
 
 mod cursor;
-pub mod unescape;
 
 #[cfg(test)]
 mod tests;
 
+// FIXME: This is needed for rust-analyzer. Remove this dependency once rust-analyzer uses
+// `literal-escaper`.
+pub use literal_escaper as unescape;
 use unicode_properties::UnicodeEmoji;
 pub use unicode_xid::UNICODE_VERSION as UNICODE_XID_VERSION;
 

diff --git a/compiler/rustc_parse/Cargo.toml b/compiler/rustc_parse/Cargo.toml
@@ -6,6 +6,7 @@ edition = "2021"
 [dependencies]
 # tidy-alphabetical-start
 bitflags = "2.4.1"
+literal-escaper = { path = "../../library/literal-escaper" }
 rustc_ast = { path = "../rustc_ast" }
 rustc_ast_pretty = { path = "../rustc_ast_pretty" }
 rustc_data_structures = { path = "../rustc_data_structures" }

diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs
@@ -1,12 +1,12 @@
 use std::ops::Range;
 
+use literal_escaper::{self, EscapeError, Mode};
 use rustc_ast::ast::{self, AttrStyle};
 use rustc_ast::token::{self, CommentKind, Delimiter, IdentIsRaw, Token, TokenKind};
 use rustc_ast::tokenstream::TokenStream;
 use rustc_ast::util::unicode::contains_text_flow_control_chars;
 use rustc_errors::codes::*;
 use rustc_errors::{Applicability, Diag, DiagCtxtHandle, StashKey};
-use rustc_lexer::unescape::{self, EscapeError, Mode};
 use rustc_lexer::{Base, Cursor, DocStyle, LiteralKind, RawStrError};
 use rustc_session::lint::BuiltinLintDiag;
 use rustc_session::lint::builtin::{
@@ -970,7 +970,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
         postfix_len: u32,
     ) -> (token::LitKind, Symbol) {
         self.cook_common(kind, mode, start, end, prefix_len, postfix_len, |src, mode, callback| {
-            unescape::unescape_unicode(src, mode, &mut |span, result| {
+            literal_escaper::unescape_unicode(src, mode, &mut |span, result| {
                 callback(span, result.map(drop))
             })
         })
@@ -986,7 +986,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
         postfix_len: u32,
     ) -> (token::LitKind, Symbol) {
         self.cook_common(kind, mode, start, end, prefix_len, postfix_len, |src, mode, callback| {
-            unescape::unescape_mixed(src, mode, &mut |span, result| {
+            literal_escaper::unescape_mixed(src, mode, &mut |span, result| {
                 callback(span, result.map(drop))
             })
         })

diff --git a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs
@@ -3,8 +3,8 @@
 use std::iter::once;
 use std::ops::Range;
 
+use literal_escaper::{EscapeError, Mode};
 use rustc_errors::{Applicability, DiagCtxtHandle, ErrorGuaranteed};
-use rustc_lexer::unescape::{EscapeError, Mode};
 use rustc_span::{BytePos, Span};
 use tracing::debug;
 

diff --git a/compiler/rustc_parse/src/parser/expr.rs b/compiler/rustc_parse/src/parser/expr.rs
@@ -6,6 +6,7 @@ use core::ops::{Bound, ControlFlow};
 use ast::mut_visit::{self, MutVisitor};
 use ast::token::IdentIsRaw;
 use ast::{CoroutineKind, ForLoopKind, GenBlockKind, MatchKind, Pat, Path, PathSegment, Recovered};
+use literal_escaper::unescape_char;
 use rustc_ast::ptr::P;
 use rustc_ast::token::{self, Delimiter, Token, TokenKind};
 use rustc_ast::tokenstream::TokenTree;
@@ -21,7 +22,6 @@ use rustc_ast::{
 use rustc_ast_pretty::pprust;
 use rustc_data_structures::stack::ensure_sufficient_stack;
 use rustc_errors::{Applicability, Diag, PResult, StashKey, Subdiagnostic};
-use rustc_lexer::unescape::unescape_char;
 use rustc_macros::Subdiagnostic;
 use rustc_session::errors::{ExprParenthesesNeeded, report_lit_error};
 use rustc_session::lint::BuiltinLintDiag;

diff --git a/compiler/rustc_parse_format/Cargo.toml b/compiler/rustc_parse_format/Cargo.toml
@@ -5,6 +5,7 @@ edition = "2021"
 
 [dependencies]
 # tidy-alphabetical-start
+literal-escaper = { path = "../../library/literal-escaper" }
 rustc_index = { path = "../rustc_index", default-features = false }
 rustc_lexer = { path = "../rustc_lexer" }
 # tidy-alphabetical-end
diff --git a/compiler/rustc_parse_format/src/lib.rs b/compiler/rustc_parse_format/src/lib.rs
@@ -19,7 +19,6 @@
 pub use Alignment::*;
 pub use Count::*;
 pub use Position::*;
-use rustc_lexer::unescape;
 
 // Note: copied from rustc_span
 /// Range inside of a `Span` used for diagnostics when we only have access to relative positions.
@@ -1095,12 +1094,14 @@ fn find_width_map_from_snippet(
 fn unescape_string(string: &str) -> Option<String> {
     let mut buf = String::new();
     let mut ok = true;
-    unescape::unescape_unicode(string, unescape::Mode::Str, &mut |_, unescaped_char| {
-        match unescaped_char {
+    literal_escaper::unescape_unicode(
+        string,
+        literal_escaper::Mode::Str,
+        &mut |_, unescaped_char| match unescaped_char {
             Ok(c) => buf.push(c),
             Err(_) => ok = false,
-        }
-    });
+        },
+    );
 
     ok.then_some(buf)
 }

diff --git a/library/Cargo.lock b/library/Cargo.lock
@@ -158,6 +158,13 @@ dependencies = [
  "rustc-std-workspace-core",
 ]
 
+[[package]]
+name = "literal-escaper"
+version = "0.0.0"
+dependencies = [
+ "rustc-std-workspace-std",
+]
+
 [[package]]
 name = "memchr"
 version = "2.7.4"
@@ -220,6 +227,7 @@ name = "proc_macro"
 version = "0.0.0"
 dependencies = [
  "core",
+ "literal-escaper",
  "std",
 ]
 

diff --git a/library/Cargo.toml b/library/Cargo.toml
@@ -7,6 +7,7 @@ members = [
 ]
 
 exclude = [
+  "literal-escaper",
   # stdarch has its own Cargo workspace
   "stdarch",
   "windows_targets"

diff --git a/library/literal-escaper/Cargo.toml b/library/literal-escaper/Cargo.toml
@@ -0,0 +1,10 @@
+[package]
+name = "literal-escaper"
+version = "0.0.0"
+edition = "2021"
+
+[dependencies]
+std = { version = '1.0.0', optional = true, package = 'rustc-std-workspace-std' }
+
+[features]
+rustc-dep-of-std = ["dep:std"]
diff --git a/library/literal-escaper/README.md b/library/literal-escaper/README.md
@@ -0,0 +1,4 @@
+# literal-escaper
+
+This crate provides code to unescape string literals. It is used by `rustc_lexer`
+and `proc_macro`.
diff --git a/compiler/rustc_lexer/src/unescape.rs → library/literal-escaper/src/lib.rs b/compiler/rustc_lexer/src/unescape.rs → library/literal-escaper/src/lib.rs
diff --git a/compiler/rustc_lexer/src/unescape/tests.rs → library/literal-escaper/src/tests.rs b/compiler/rustc_lexer/src/unescape/tests.rs → library/literal-escaper/src/tests.rs
diff --git a/library/proc_macro/Cargo.toml b/library/proc_macro/Cargo.toml
@@ -4,6 +4,7 @@ version = "0.0.0"
 edition = "2021"
 
 [dependencies]
+literal-escaper = { path = "../literal-escaper", features = ["rustc-dep-of-std"] }
 std = { path = "../std" }
 # Workaround: when documenting this crate rustdoc will try to load crate named
 # `core` when resolving doc links. Without this line a different `core` will be

diff --git a/library/proc_macro/src/lib.rs b/library/proc_macro/src/lib.rs
@@ -28,6 +28,7 @@
 #![feature(restricted_std)]
 #![feature(rustc_attrs)]
 #![feature(extend_one)]
+#![feature(stmt_expr_attributes)]
 #![recursion_limit = "256"]
 #![allow(internal_features)]
 #![deny(ffi_unwind_calls)]
@@ -50,11 +51,24 @@ use std::{error, fmt};
 
 #[unstable(feature = "proc_macro_diagnostic", issue = "54140")]
 pub use diagnostic::{Diagnostic, Level, MultiSpan};
+#[unstable(feature = "proc_macro_value", issue = "136652")]
+pub use literal_escaper::EscapeError;
+use literal_escaper::{MixedUnit, Mode, byte_from_char, unescape_mixed, unescape_unicode};
 #[unstable(feature = "proc_macro_totokens", issue = "130977")]
 pub use to_tokens::ToTokens;
 
 use crate::escape::{EscapeOptions, escape_bytes};
 
+/// Errors returned when trying to retrieve a literal unescaped value.
+#[unstable(feature = "proc_macro_value", issue = "136652")]
+#[derive(Debug, PartialEq, Eq)]
+pub enum ConversionErrorKind {
+    /// The literal failed to be escaped, take a look at [`EscapeError`] for more information.
+    FailedToUnescape(EscapeError),
+    /// Trying to convert a literal with the wrong type.
+    InvalidLiteralKind,
+}
+
 /// Determines whether proc_macro has been made accessible to the currently
 /// running program.
 ///
@@ -1450,6 +1464,107 @@ impl Literal {
             }
         })
     }
+
+    /// Returns the unescaped string value if the current literal is a string or a string literal.
+    #[unstable(feature = "proc_macro_value", issue = "136652")]
+    pub fn str_value(&self) -> Result<String, ConversionErrorKind> {
+        self.0.symbol.with(|symbol| match self.0.kind {
+            bridge::LitKind::Str => {
+                if symbol.contains('\\') {
+                    let mut buf = String::with_capacity(symbol.len());
+                    let mut error = None;
+                    // Force-inlining here is aggressive but the closure is
+                    // called on every char in the string, so it can be hot in
+                    // programs with many long strings containing escapes.
+                    unescape_unicode(
+                        symbol,
+                        Mode::Str,
+                        &mut #[inline(always)]
+                        |_, c| match c {
+                            Ok(c) => buf.push(c),
+                            Err(err) => {
+                                if err.is_fatal() {
+                                    error = Some(ConversionErrorKind::FailedToUnescape(err));
+                                }
+                            }
+                        },
+                    );
+                    if let Some(error) = error { Err(error) } else { Ok(buf) }
+                } else {
+                    Ok(symbol.to_string())
+                }
+            }
+            bridge::LitKind::StrRaw(_) => Ok(symbol.to_string()),
+            _ => Err(ConversionErrorKind::InvalidLiteralKind),
+        })
+    }
+
+    /// Returns the unescaped string value if the current literal is a c-string or a c-string
+    /// literal.
+    #[unstable(feature = "proc_macro_value", issue = "136652")]
+    pub fn cstr_value(&self) -> Result<Vec<u8>, ConversionErrorKind> {
+        self.0.symbol.with(|symbol| match self.0.kind {
+            bridge::LitKind::CStr => {
+                let mut error = None;
+                let mut buf = Vec::with_capacity(symbol.len());
+
+                unescape_mixed(symbol, Mode::CStr, &mut |_span, c| match c {
+                    Ok(MixedUnit::Char(c)) => {
+                        buf.extend_from_slice(c.encode_utf8(&mut [0; 4]).as_bytes())
+                    }
+                    Ok(MixedUnit::HighByte(b)) => buf.push(b),
+                    Err(err) => {
+                        if err.is_fatal() {
+                            error = Some(ConversionErrorKind::FailedToUnescape(err));
+                        }
+                    }
+                });
+                if let Some(error) = error {
+                    Err(error)
+                } else {
+                    buf.push(0);
+                    Ok(buf)
+                }
+            }
+            bridge::LitKind::CStrRaw(_) => {
+                // Raw strings have no escapes so we can convert the symbol
+                // directly to a `Lrc<u8>` after appending the terminating NUL
+                // char.
+                let mut buf = symbol.to_owned().into_bytes();
+                buf.push(0);
+                Ok(buf)
+            }
+            _ => Err(ConversionErrorKind::InvalidLiteralKind),
+        })
+    }
+
+    /// Returns the unescaped string value if the current literal is a byte string or a byte string
+    /// literal.
+    #[unstable(feature = "proc_macro_value", issue = "136652")]
+    pub fn byte_str_value(&self) -> Result<Vec<u8>, ConversionErrorKind> {
+        self.0.symbol.with(|symbol| match self.0.kind {
+            bridge::LitKind::ByteStr => {
+                let mut buf = Vec::with_capacity(symbol.len());
+                let mut error = None;
+
+                unescape_unicode(symbol, Mode::ByteStr, &mut |_, c| match c {
+                    Ok(c) => buf.push(byte_from_char(c)),
+                    Err(err) => {
+                        if err.is_fatal() {
+                            error = Some(ConversionErrorKind::FailedToUnescape(err));
+                        }
+                    }
+                });
+                if let Some(error) = error { Err(error) } else { Ok(buf) }
+            }
+            bridge::LitKind::ByteStrRaw(_) => {
+                // Raw strings have no escapes so we can convert the symbol
+                // directly to a `Lrc<u8>`.
+                Ok(symbol.to_owned().into_bytes())
+            }
+            _ => Err(ConversionErrorKind::InvalidLiteralKind),
+        })
+    }
 }
 
 /// Parse a single literal from its stringified representation.