Skip to content

Experiment: Reserve guarded string literal syntax (RFC 3593) on all editions #124605

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions compiler/rustc_lexer/src/cursor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ use std::str::Chars;
///
/// Next characters can be peeked via `first` method,
/// and position can be shifted forward via `bump` method.
#[derive(Clone)]
pub struct Cursor<'a> {
len_remaining: usize,
/// Iterator over chars. Slightly faster than a &str.
Expand Down
92 changes: 84 additions & 8 deletions compiler/rustc_lexer/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ pub mod unescape;
#[cfg(test)]
mod tests;

use std::num::NonZeroU8;

pub use crate::cursor::Cursor;

use self::LiteralKind::*;
Expand Down Expand Up @@ -179,24 +181,27 @@ pub enum DocStyle {
/// `rustc_ast::ast::LitKind`).
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub enum LiteralKind {
/// "12_u8", "0o100", "0b120i99", "1f32".
/// `12_u8`, `0o100`, `0b120i99`, `1f32`.
Int { base: Base, empty_int: bool },
/// "12.34f32", "1e3", but not "1f32".
/// `12.34f32`, `1e3`, but not `1f32`.
Float { base: Base, empty_exponent: bool },
/// "'a'", "'\\'", "'''", "';"
/// `'a'`, `'\\'`, `'''`, `';`
Char { terminated: bool },
/// "b'a'", "b'\\'", "b'''", "b';"
/// `b'a'`, `b'\\'`, `b'''`, `b';`
Byte { terminated: bool },
/// ""abc"", ""abc"
/// `"abc"`, `"abc`
Str { terminated: bool },
/// "b"abc"", "b"abc"
/// `b"abc"`, `b"abc`
ByteStr { terminated: bool },
/// `c"abc"`, `c"abc`
CStr { terminated: bool },
/// "r"abc"", "r#"abc"#", "r####"ab"###"c"####", "r#"a". `None` indicates
/// `#"abc"#`, `#"a`, `##"a"#`. `None` indicates no closing quote.
/// Allows fewer hashes to close the string to support older editions.
GuardedStr { n_start_hashes: Option<NonZeroU8>, n_end_hashes: u8 },
/// `r"abc"`, `r#"abc"#`, `r####"ab"###"c"####`, `r#"a`. `None` indicates
/// an invalid literal.
RawStr { n_hashes: Option<u8> },
/// "br"abc"", "br#"abc"#", "br####"ab"###"c"####", "br#"a". `None`
/// `br"abc"`, `br#"abc"#`, `br####"ab"###"c"####`, `br#"a`. `None`
/// indicates an invalid literal.
RawByteStr { n_hashes: Option<u8> },
/// `cr"abc"`, "cr#"abc"#", `cr#"a`. `None` indicates an invalid literal.
Expand Down Expand Up @@ -365,6 +370,49 @@ impl Cursor<'_> {
_ => self.ident_or_unknown_prefix(),
},

// Guarded string literal (reserved syntax).
'#' if matches!(self.first(), '"' | '#') => {
// Create a backup to restore later if this
// turns out to not be a guarded literal.
let backup = self.clone();

let mut n_start_hashes: u32 = 1; // Already captured one `#`.
while self.first() == '#' {
n_start_hashes += 1;
self.bump();
}

if self.first() == '"' {
self.bump();

let res = self.guarded_double_quoted_string(n_start_hashes);
let suffix_start = self.pos_within_token();

if let (Ok(n_end_hashes), Ok(n)) = (res, u8::try_from(n_start_hashes)) {
self.eat_literal_suffix();

Literal {
kind: GuardedStr {
n_start_hashes: NonZeroU8::new(n),
// Always succeeds because `n_end_hashes <= n`
n_end_hashes: n_end_hashes.try_into().unwrap(),
},
suffix_start,
}
} else {
Literal {
kind: GuardedStr { n_start_hashes: None, n_end_hashes: 0 },
suffix_start,
}
}
} else {
// Not a guarded string, so restore old state.
*self = backup;
// Return a pound token.
Pound
}
}

// Byte literal, byte string literal, raw byte string literal or identifier.
'b' => self.c_or_byte_string(
|terminated| ByteStr { terminated },
Expand Down Expand Up @@ -758,6 +806,34 @@ impl Cursor<'_> {
false
}

/// Eats the double-quoted string and returns `n_hashes` and an error if encountered.
fn guarded_double_quoted_string(&mut self, n_start_hashes: u32) -> Result<u32, RawStrError> {
debug_assert!(self.prev() == '"');

// Lex the string itself as a normal string literal
// so we can recover that for older editions later.
if !self.double_quoted_string() {
return Err(RawStrError::NoTerminator {
expected: n_start_hashes,
found: 0,
possible_terminator_offset: None,
});
}

// Consume closing '#' symbols.
// Note that this will not consume extra trailing `#` characters:
// `###"abcde"####` is lexed as a `GuardedStr { n_hashes: 3 }`
// followed by a `#` token.
let mut n_end_hashes = 0;
while self.first() == '#' && n_end_hashes < n_start_hashes {
n_end_hashes += 1;
self.bump();
}

// Handle `n_end_hashes < n_start_hashes` later.
Ok(n_end_hashes)
}

/// Eats the double-quoted string and returns `n_hashes` and an error if encountered.
fn raw_double_quoted_string(&mut self, prefix_len: u32) -> Result<u8, RawStrError> {
// Wrap the actual function to handle the error with too many hashes.
Expand Down
4 changes: 4 additions & 0 deletions compiler/rustc_parse/messages.ftl
Original file line number Diff line number Diff line change
Expand Up @@ -672,6 +672,10 @@ parse_require_colon_after_labeled_expression = labeled expression must be follow
.label = the label
.suggestion = add `:` after the label

parse_reserved_guarded_string = invalid string literal
.note = unprefixed guarded string literals are reserved for future use
.suggestion_whitespace = consider inserting whitespace here

parse_return_types_use_thin_arrow = return types are denoted using `->`
.suggestion = use `->` instead

Expand Down
18 changes: 18 additions & 0 deletions compiler/rustc_parse/src/errors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2009,6 +2009,24 @@ pub enum UnknownPrefixSugg {
},
}

#[derive(Diagnostic)]
#[diag(parse_reserved_guarded_string)]
#[note]
pub struct ReservedGuardedString {
#[primary_span]
pub span: Span,
#[subdiagnostic]
pub sugg: Option<GuardedStringSugg>,
}
#[derive(Subdiagnostic)]
#[suggestion(
parse_suggestion_whitespace,
code = " ",
applicability = "maybe-incorrect",
style = "verbose"
)]
pub struct GuardedStringSugg(#[primary_span] pub Span);

#[derive(Diagnostic)]
#[diag(parse_too_many_hashes)]
pub struct TooManyHashes {
Expand Down
24 changes: 24 additions & 0 deletions compiler/rustc_parse/src/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -490,6 +490,30 @@ impl<'psess, 'src> StringReader<'psess, 'src> {
self.report_raw_str_error(start, 1);
}
}
// RFC 3598 reserved this syntax for future use.
rustc_lexer::LiteralKind::GuardedStr { n_start_hashes, n_end_hashes } => {
let span = self.mk_sp(start, self.pos);

if let Some(n_start_hashes) = n_start_hashes {
let n = u32::from(n_start_hashes.get());
let e = u32::from(n_end_hashes);
let expn_data = span.ctxt().outer_expn_data();

let space_pos = start + BytePos(n);
let space_span = self.mk_sp(space_pos, space_pos);

let sugg = if expn_data.is_root() {
Some(errors::GuardedStringSugg(space_span))
} else {
None
};

self.dcx().emit_err(errors::ReservedGuardedString { span, sugg });
self.cook_unicode(token::Str, Mode::Str, start, end, 1 + n, 1 + e) // ##" "##
} else {
self.dcx().emit_fatal(errors::ReservedGuardedString { span, sugg: None });
}
}
rustc_lexer::LiteralKind::RawByteStr { n_hashes } => {
if let Some(n_hashes) = n_hashes {
let n = u32::from(n_hashes);
Expand Down
3 changes: 2 additions & 1 deletion src/librustdoc/html/highlight.rs
Original file line number Diff line number Diff line change
Expand Up @@ -850,7 +850,8 @@ impl<'src> Classifier<'src> {
| LiteralKind::RawStr { .. }
| LiteralKind::RawByteStr { .. }
| LiteralKind::CStr { .. }
| LiteralKind::RawCStr { .. } => Class::String,
| LiteralKind::RawCStr { .. }
| LiteralKind::GuardedStr { .. } => Class::String,
// Number literals.
LiteralKind::Float { .. } | LiteralKind::Int { .. } => Class::Number,
},
Expand Down
4 changes: 4 additions & 0 deletions src/tools/rust-analyzer/crates/parser/src/lexed_str.rs
Original file line number Diff line number Diff line change
Expand Up @@ -331,6 +331,10 @@ impl<'a> Converter<'a> {
}
C_STRING
}
rustc_lexer::LiteralKind::GuardedStr { .. } => {
err = "Invalid string literal";
STRING
}
};

let err = if err.is_empty() { None } else { Some(err) };
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@ impl server::FreeFunctions for RaSpanServer {
3 + n_hashes.unwrap_or_default() as usize,
1 + n_hashes.unwrap_or_default() as usize,
),
LiteralKind::GuardedStr { .. } => return Err(()),
};

let (lit, suffix) = s.split_at(suffix_start as usize);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ impl server::FreeFunctions for TokenIdServer {
3 + n_hashes.unwrap_or_default() as usize,
1 + n_hashes.unwrap_or_default() as usize,
),
LiteralKind::GuardedStr { .. } => return Err(()),
};

let (lit, suffix) = s.split_at(suffix_start as usize);
Expand Down
60 changes: 60 additions & 0 deletions tests/ui/lexer/reserved-guarded-strings.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
//@ compile-flags: -Zunstable-options
//@ edition:2024

macro_rules! demo1 {
( $a:tt ) => { println!("one tokens") };
}

macro_rules! demo2 {
( $a:tt $b:tt ) => { println!("two tokens") };
}

macro_rules! demo3 {
( $a:tt $b:tt $c:tt ) => { println!("three tokens") };
}

macro_rules! demo4 {
( $a:tt $b:tt $c:tt $d:tt ) => { println!("four tokens") };
}

macro_rules! demo5 {
( $a:tt $b:tt $c:tt $d:tt $e:tt ) => { println!("five tokens") };
}

macro_rules! demo6 {
( $a:tt $b:tt $c:tt $d:tt $e:tt $f:tt ) => { println!("six tokens") };
}

macro_rules! demo7 {
( $a:tt $b:tt $c:tt $d:tt $e:tt $f:tt $g:tt ) => { println!("seven tokens") };
}

fn main() {
demo1!("");
demo2!(# "");
demo3!(# ""#);
demo2!(# "foo");
demo3!(## "foo");
demo3!(# "foo"#);
demo4!(### "foo");
demo4!(## "foo"#);
demo7!(### "foo"###);

demo2!("foo"#);
demo4!("foo"###);

demo2!(blah"xx"); //~ ERROR prefix `blah` is unknown
demo2!(blah#"xx"#);
//~^ ERROR prefix `blah` is unknown
//~| ERROR invalid string literal

demo1!(#""); //~ ERROR invalid string literal
demo1!(#""#); //~ ERROR invalid string literal
demo1!(####""); //~ ERROR invalid string literal
demo1!(#"foo"); //~ ERROR invalid string literal
demo1!(###"foo"); //~ ERROR invalid string literal
demo1!(#"foo"#); //~ ERROR invalid string literal
demo1!(###"foo"#); //~ ERROR invalid string literal
demo1!(###"foo"##); //~ ERROR invalid string literal
demo1!(###"foo"###); //~ ERROR invalid string literal
}
Loading
Loading