Skip to content

Commit

Permalink
Auto merge of #113476 - fee1-dead-contrib:c-str-lit, r=petrochenkov
Browse files Browse the repository at this point in the history
Reimplement C-str literals

This reverts #113334, cc `@fmease.`

While converting lexer tokens to ast Tokens in `rustc_parse`, we check the edition of the span of the token. If the edition < 2021, we split the token into two, one being the identifier and other being the str literal.
  • Loading branch information
bors committed Jul 25, 2023
2 parents ff8fe76 + a0376e9 commit 23405bb
Show file tree
Hide file tree
Showing 12 changed files with 85 additions and 97 deletions.
4 changes: 4 additions & 0 deletions compiler/rustc_lexer/src/cursor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@ impl<'a> Cursor<'a> {
}
}

pub fn as_str(&self) -> &'a str {
self.chars.as_str()
}

/// Returns the last eaten symbol (or `'\0'` in release builds).
/// (For debug assertions only.)
pub(crate) fn prev(&self) -> char {
Expand Down
7 changes: 7 additions & 0 deletions compiler/rustc_lexer/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -367,6 +367,13 @@ impl Cursor<'_> {
Some(|terminated| Byte { terminated }),
),

// c-string literal, raw c-string literal or identifier.
'c' => self.c_or_byte_string(
|terminated| CStr { terminated },
|n_hashes| RawCStr { n_hashes },
None,
),

// Identifier (this should be checked after other variant that can
// start as identifier).
c if is_id_start(c) => self.ident_or_unknown_prefix(),
Expand Down
41 changes: 32 additions & 9 deletions compiler/rustc_parse/src/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ use rustc_ast::tokenstream::TokenStream;
use rustc_ast::util::unicode::contains_text_flow_control_chars;
use rustc_errors::{error_code, Applicability, Diagnostic, DiagnosticBuilder, StashKey};
use rustc_lexer::unescape::{self, EscapeError, Mode};
use rustc_lexer::Cursor;
use rustc_lexer::{Base, DocStyle, RawStrError};
use rustc_lexer::{Cursor, LiteralKind};
use rustc_session::lint::builtin::{
RUST_2021_PREFIXES_INCOMPATIBLE_SYNTAX, TEXT_DIRECTION_CODEPOINT_IN_COMMENT,
};
Expand Down Expand Up @@ -118,6 +118,7 @@ impl<'a> StringReader<'a> {
let mut swallow_next_invalid = 0;
// Skip trivial (whitespace & comments) tokens
loop {
let str_before = self.cursor.as_str();
let token = self.cursor.advance_token();
let start = self.pos;
self.pos = self.pos + BytePos(token.len);
Expand Down Expand Up @@ -165,10 +166,7 @@ impl<'a> StringReader<'a> {
continue;
}
rustc_lexer::TokenKind::Ident => {
let sym = nfc_normalize(self.str_from(start));
let span = self.mk_sp(start, self.pos);
self.sess.symbol_gallery.insert(sym, span);
token::Ident(sym, false)
self.ident(start)
}
rustc_lexer::TokenKind::RawIdent => {
let sym = nfc_normalize(self.str_from(start + BytePos(2)));
Expand All @@ -182,10 +180,7 @@ impl<'a> StringReader<'a> {
}
rustc_lexer::TokenKind::UnknownPrefix => {
self.report_unknown_prefix(start);
let sym = nfc_normalize(self.str_from(start));
let span = self.mk_sp(start, self.pos);
self.sess.symbol_gallery.insert(sym, span);
token::Ident(sym, false)
self.ident(start)
}
rustc_lexer::TokenKind::InvalidIdent
// Do not recover an identifier with emoji if the codepoint is a confusable
Expand All @@ -203,6 +198,27 @@ impl<'a> StringReader<'a> {
.push(span);
token::Ident(sym, false)
}
// split up (raw) c string literals to an ident and a string literal when edition < 2021.
rustc_lexer::TokenKind::Literal {
kind: kind @ (LiteralKind::CStr { .. } | LiteralKind::RawCStr { .. }),
suffix_start: _,
} if !self.mk_sp(start, self.pos).edition().at_least_rust_2021() => {
let prefix_len = match kind {
LiteralKind::CStr { .. } => 1,
LiteralKind::RawCStr { .. } => 2,
_ => unreachable!(),
};

// reset the state so that only the prefix ("c" or "cr")
// was consumed.
let lit_start = start + BytePos(prefix_len);
self.pos = lit_start;
self.cursor = Cursor::new(&str_before[prefix_len as usize..]);

self.report_unknown_prefix(start);
let prefix_span = self.mk_sp(start, lit_start);
return (Token::new(self.ident(start), prefix_span), preceded_by_whitespace);
}
rustc_lexer::TokenKind::Literal { kind, suffix_start } => {
let suffix_start = start + BytePos(suffix_start);
let (kind, symbol) = self.cook_lexer_literal(start, suffix_start, kind);
Expand Down Expand Up @@ -317,6 +333,13 @@ impl<'a> StringReader<'a> {
}
}

fn ident(&self, start: BytePos) -> TokenKind {
let sym = nfc_normalize(self.str_from(start));
let span = self.mk_sp(start, self.pos);
self.sess.symbol_gallery.insert(sym, span);
token::Ident(sym, false)
}

fn struct_fatal_span_char(
&self,
from_pos: BytePos,
Expand Down
14 changes: 14 additions & 0 deletions tests/ui/rfcs/rfc-3348-c-string-literals/auxiliary/count.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
// force-host
// edition: 2018
// no-prefer-dynamic
#![crate_type = "proc-macro"]

extern crate proc_macro;

use proc_macro::TokenStream;
use std::str::FromStr;

#[proc_macro]
pub fn number_of_tokens(_: TokenStream) -> TokenStream {
TokenStream::from_str("c\"\"").unwrap().into_iter().count().to_string().parse().unwrap()
}
3 changes: 1 addition & 2 deletions tests/ui/rfcs/rfc-3348-c-string-literals/basic.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
// FIXME(c_str_literals): This should be `run-pass`
// known-bug: #113333
// run-pass
// edition: 2021

#![feature(c_str_literals)]
Expand Down
25 changes: 0 additions & 25 deletions tests/ui/rfcs/rfc-3348-c-string-literals/basic.stderr

This file was deleted.

16 changes: 16 additions & 0 deletions tests/ui/rfcs/rfc-3348-c-string-literals/edition-spans.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
// even if this crate is edition 2021, proc macros compiled using older
// editions should still be able to observe the pre-2021 token behavior
//
// adapted from tests/ui/rust-2021/reserved-prefixes-via-macro.rs

// edition: 2021
// check-pass

// aux-build: count.rs
extern crate count;

const _: () = {
assert!(count::number_of_tokens!() == 2);
};

fn main() {}
31 changes: 10 additions & 21 deletions tests/ui/rfcs/rfc-3348-c-string-literals/gate.stderr
Original file line number Diff line number Diff line change
@@ -1,32 +1,21 @@
error: prefix `c` is unknown
error[E0658]: `c".."` literals are experimental
--> $DIR/gate.rs:10:5
|
LL | c"foo";
| ^ unknown prefix
| ^^^^^^
|
= note: prefixed identifiers and literals are reserved since Rust 2021
help: consider inserting whitespace here
|
LL | c "foo";
| +
= note: see issue #105723 <https://github.com/rust-lang/rust/issues/105723> for more information
= help: add `#![feature(c_str_literals)]` to the crate attributes to enable

error: prefix `c` is unknown
error[E0658]: `c".."` literals are experimental
--> $DIR/gate.rs:13:8
|
LL | m!(c"test");
| ^ unknown prefix
|
= note: prefixed identifiers and literals are reserved since Rust 2021
help: consider inserting whitespace here
| ^^^^^^^
|
LL | m!(c "test");
| +

error: expected one of `!`, `.`, `::`, `;`, `?`, `{`, `}`, or an operator, found `"foo"`
--> $DIR/gate.rs:10:6
|
LL | c"foo";
| ^^^^^ expected one of 8 possible tokens
= note: see issue #105723 <https://github.com/rust-lang/rust/issues/105723> for more information
= help: add `#![feature(c_str_literals)]` to the crate attributes to enable

error: aborting due to 3 previous errors
error: aborting due to 2 previous errors

For more information about this error, try `rustc --explain E0658`.
Binary file modified tests/ui/rfcs/rfc-3348-c-string-literals/no-nuls.rs
Binary file not shown.
Binary file modified tests/ui/rfcs/rfc-3348-c-string-literals/no-nuls.stderr
Binary file not shown.
3 changes: 1 addition & 2 deletions tests/ui/rfcs/rfc-3348-c-string-literals/non-ascii.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
// FIXME(c_str_literals): This should be `run-pass`
// known-bug: #113333
// run-pass
// edition: 2021

#![feature(c_str_literals)]
Expand Down
38 changes: 0 additions & 38 deletions tests/ui/rfcs/rfc-3348-c-string-literals/non-ascii.stderr

This file was deleted.

0 comments on commit 23405bb

Please sign in to comment.