Skip to content

Commit 4891f02

Browse files
authored
Rollup merge of #108801 - fee1-dead-contrib:c-str, r=compiler-errors
Implement RFC 3348, `c"foo"` literals RFC: rust-lang/rfcs#3348 Tracking issue: #105723
2 parents 3502e48 + d30c668 commit 4891f02

File tree

33 files changed

+500
-153
lines changed

33 files changed

+500
-153
lines changed

Diff for: compiler/rustc_ast/src/ast.rs

+3
Original file line numberDiff line numberDiff line change
@@ -1821,6 +1821,8 @@ pub enum LitKind {
18211821
/// A byte string (`b"foo"`). Not stored as a symbol because it might be
18221822
/// non-utf8, and symbols only allow utf8 strings.
18231823
ByteStr(Lrc<[u8]>, StrStyle),
1824+
/// A C String (`c"foo"`). Guaranteed to only have `\0` at the end.
1825+
CStr(Lrc<[u8]>, StrStyle),
18241826
/// A byte char (`b'f'`).
18251827
Byte(u8),
18261828
/// A character literal (`'a'`).
@@ -1875,6 +1877,7 @@ impl LitKind {
18751877
// unsuffixed variants
18761878
LitKind::Str(..)
18771879
| LitKind::ByteStr(..)
1880+
| LitKind::CStr(..)
18781881
| LitKind::Byte(..)
18791882
| LitKind::Char(..)
18801883
| LitKind::Int(_, LitIntType::Unsuffixed)

Diff for: compiler/rustc_ast/src/token.rs

+7
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,8 @@ pub enum LitKind {
7474
StrRaw(u8), // raw string delimited by `n` hash symbols
7575
ByteStr,
7676
ByteStrRaw(u8), // raw byte string delimited by `n` hash symbols
77+
CStr,
78+
CStrRaw(u8),
7779
Err,
7880
}
7981

@@ -141,6 +143,10 @@ impl fmt::Display for Lit {
141143
delim = "#".repeat(n as usize),
142144
string = symbol
143145
)?,
146+
CStr => write!(f, "c\"{symbol}\"")?,
147+
CStrRaw(n) => {
148+
write!(f, "cr{delim}\"{symbol}\"{delim}", delim = "#".repeat(n as usize))?
149+
}
144150
Integer | Float | Bool | Err => write!(f, "{symbol}")?,
145151
}
146152

@@ -170,6 +176,7 @@ impl LitKind {
170176
Float => "float",
171177
Str | StrRaw(..) => "string",
172178
ByteStr | ByteStrRaw(..) => "byte string",
179+
CStr | CStrRaw(..) => "C string",
173180
Err => "error",
174181
}
175182
}

Diff for: compiler/rustc_ast/src/util/literal.rs

+62-1
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,13 @@
22
33
use crate::ast::{self, LitKind, MetaItemLit, StrStyle};
44
use crate::token::{self, Token};
5-
use rustc_lexer::unescape::{byte_from_char, unescape_byte, unescape_char, unescape_literal, Mode};
5+
use rustc_lexer::unescape::{
6+
byte_from_char, unescape_byte, unescape_c_string, unescape_char, unescape_literal, CStrUnit,
7+
Mode,
8+
};
69
use rustc_span::symbol::{kw, sym, Symbol};
710
use rustc_span::Span;
11+
use std::ops::Range;
812
use std::{ascii, fmt, str};
913

1014
// Escapes a string, represented as a symbol. Reuses the original symbol,
@@ -35,6 +39,7 @@ pub enum LitError {
3539
InvalidFloatSuffix,
3640
NonDecimalFloat(u32),
3741
IntTooLarge(u32),
42+
NulInCStr(Range<usize>),
3843
}
3944

4045
impl LitKind {
@@ -158,6 +163,52 @@ impl LitKind {
158163

159164
LitKind::ByteStr(bytes.into(), StrStyle::Raw(n))
160165
}
166+
token::CStr => {
167+
let s = symbol.as_str();
168+
let mut buf = Vec::with_capacity(s.len());
169+
let mut error = Ok(());
170+
unescape_c_string(s, Mode::CStr, &mut |span, c| match c {
171+
Ok(CStrUnit::Byte(0) | CStrUnit::Char('\0')) => {
172+
error = Err(LitError::NulInCStr(span));
173+
}
174+
Ok(CStrUnit::Byte(b)) => buf.push(b),
175+
Ok(CStrUnit::Char(c)) if c.len_utf8() == 1 => buf.push(c as u8),
176+
Ok(CStrUnit::Char(c)) => {
177+
buf.extend_from_slice(c.encode_utf8(&mut [0; 4]).as_bytes())
178+
}
179+
Err(err) => {
180+
if err.is_fatal() {
181+
error = Err(LitError::LexerError);
182+
}
183+
}
184+
});
185+
error?;
186+
buf.push(0);
187+
LitKind::CStr(buf.into(), StrStyle::Cooked)
188+
}
189+
token::CStrRaw(n) => {
190+
let s = symbol.as_str();
191+
let mut buf = Vec::with_capacity(s.len());
192+
let mut error = Ok(());
193+
unescape_c_string(s, Mode::RawCStr, &mut |span, c| match c {
194+
Ok(CStrUnit::Byte(0) | CStrUnit::Char('\0')) => {
195+
error = Err(LitError::NulInCStr(span));
196+
}
197+
Ok(CStrUnit::Byte(b)) => buf.push(b),
198+
Ok(CStrUnit::Char(c)) if c.len_utf8() == 1 => buf.push(c as u8),
199+
Ok(CStrUnit::Char(c)) => {
200+
buf.extend_from_slice(c.encode_utf8(&mut [0; 4]).as_bytes())
201+
}
202+
Err(err) => {
203+
if err.is_fatal() {
204+
error = Err(LitError::LexerError);
205+
}
206+
}
207+
});
208+
error?;
209+
buf.push(0);
210+
LitKind::CStr(buf.into(), StrStyle::Raw(n))
211+
}
161212
token::Err => LitKind::Err,
162213
})
163214
}
@@ -191,6 +242,14 @@ impl fmt::Display for LitKind {
191242
string = symbol
192243
)?;
193244
}
245+
LitKind::CStr(ref bytes, StrStyle::Cooked) => {
246+
write!(f, "c\"{}\"", escape_byte_str_symbol(bytes))?
247+
}
248+
LitKind::CStr(ref bytes, StrStyle::Raw(n)) => {
249+
// This can only be valid UTF-8.
250+
let symbol = str::from_utf8(bytes).unwrap();
251+
write!(f, "cr{delim}\"{symbol}\"{delim}", delim = "#".repeat(n as usize),)?;
252+
}
194253
LitKind::Int(n, ty) => {
195254
write!(f, "{n}")?;
196255
match ty {
@@ -237,6 +296,8 @@ impl MetaItemLit {
237296
LitKind::Str(_, ast::StrStyle::Raw(n)) => token::StrRaw(n),
238297
LitKind::ByteStr(_, ast::StrStyle::Cooked) => token::ByteStr,
239298
LitKind::ByteStr(_, ast::StrStyle::Raw(n)) => token::ByteStrRaw(n),
299+
LitKind::CStr(_, ast::StrStyle::Cooked) => token::CStr,
300+
LitKind::CStr(_, ast::StrStyle::Raw(n)) => token::CStrRaw(n),
240301
LitKind::Byte(_) => token::Byte,
241302
LitKind::Char(_) => token::Char,
242303
LitKind::Int(..) => token::Integer,

Diff for: compiler/rustc_ast_passes/src/feature_gate.rs

+1
Original file line numberDiff line numberDiff line change
@@ -572,6 +572,7 @@ pub fn check_crate(krate: &ast::Crate, sess: &Session) {
572572
}
573573
};
574574
}
575+
gate_all!(c_str_literals, "`c\"..\"` literals are experimental");
575576
gate_all!(
576577
if_let_guard,
577578
"`if let` guards are experimental",

Diff for: compiler/rustc_ast_pretty/src/pprust/state.rs

+4
Original file line numberDiff line numberDiff line change
@@ -210,6 +210,10 @@ pub fn literal_to_string(lit: token::Lit) -> String {
210210
token::ByteStrRaw(n) => {
211211
format!("br{delim}\"{string}\"{delim}", delim = "#".repeat(n as usize), string = symbol)
212212
}
213+
token::CStr => format!("c\"{symbol}\""),
214+
token::CStrRaw(n) => {
215+
format!("cr{delim}\"{symbol}\"{delim}", delim = "#".repeat(n as usize))
216+
}
213217
token::Integer | token::Float | token::Bool | token::Err => symbol.to_string(),
214218
};
215219

Diff for: compiler/rustc_builtin_macros/src/concat.rs

+4
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,10 @@ pub fn expand_concat(
3232
Ok(ast::LitKind::Bool(b)) => {
3333
accumulator.push_str(&b.to_string());
3434
}
35+
Ok(ast::LitKind::CStr(..)) => {
36+
cx.span_err(e.span, "cannot concatenate a C string literal");
37+
has_errors = true;
38+
}
3539
Ok(ast::LitKind::Byte(..) | ast::LitKind::ByteStr(..)) => {
3640
cx.emit_err(errors::ConcatBytestr { span: e.span });
3741
has_errors = true;

Diff for: compiler/rustc_builtin_macros/src/concat_bytes.rs

+5
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,11 @@ fn invalid_type_err(
1818
};
1919
let snippet = cx.sess.source_map().span_to_snippet(span).ok();
2020
match ast::LitKind::from_token_lit(token_lit) {
21+
Ok(ast::LitKind::CStr(_, _)) => {
22+
// FIXME(c_str_literals): should concatenation of C string literals
23+
// include the null bytes in the end?
24+
cx.span_err(span, "cannot concatenate C string literals");
25+
}
2126
Ok(ast::LitKind::Char(_)) => {
2227
let sugg =
2328
snippet.map(|snippet| ConcatBytesInvalidSuggestion::CharLit { span, snippet });

Diff for: compiler/rustc_expand/src/proc_macro_server.rs

+6
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,8 @@ impl FromInternal<token::LitKind> for LitKind {
6161
token::StrRaw(n) => LitKind::StrRaw(n),
6262
token::ByteStr => LitKind::ByteStr,
6363
token::ByteStrRaw(n) => LitKind::ByteStrRaw(n),
64+
token::CStr => LitKind::CStr,
65+
token::CStrRaw(n) => LitKind::CStrRaw(n),
6466
token::Err => LitKind::Err,
6567
token::Bool => unreachable!(),
6668
}
@@ -78,6 +80,8 @@ impl ToInternal<token::LitKind> for LitKind {
7880
LitKind::StrRaw(n) => token::StrRaw(n),
7981
LitKind::ByteStr => token::ByteStr,
8082
LitKind::ByteStrRaw(n) => token::ByteStrRaw(n),
83+
LitKind::CStr => token::CStr,
84+
LitKind::CStrRaw(n) => token::CStrRaw(n),
8185
LitKind::Err => token::Err,
8286
}
8387
}
@@ -436,6 +440,8 @@ impl server::FreeFunctions for Rustc<'_, '_> {
436440
| token::LitKind::StrRaw(_)
437441
| token::LitKind::ByteStr
438442
| token::LitKind::ByteStrRaw(_)
443+
| token::LitKind::CStr
444+
| token::LitKind::CStrRaw(_)
439445
| token::LitKind::Err => return Err(()),
440446
token::LitKind::Integer | token::LitKind::Float => {}
441447
}

Diff for: compiler/rustc_feature/src/active.rs

+2
Original file line numberDiff line numberDiff line change
@@ -313,6 +313,8 @@ declare_features! (
313313
(active, async_closure, "1.37.0", Some(62290), None),
314314
/// Allows async functions to be declared, implemented, and used in traits.
315315
(active, async_fn_in_trait, "1.66.0", Some(91611), None),
316+
/// Allows `c"foo"` literals.
317+
(active, c_str_literals, "CURRENT_RUSTC_VERSION", Some(105723), None),
316318
/// Treat `extern "C"` function as nounwind.
317319
(active, c_unwind, "1.52.0", Some(74990), None),
318320
/// Allows using C-variadics.

Diff for: compiler/rustc_hir/src/lang_items.rs

+1
Original file line numberDiff line numberDiff line change
@@ -333,6 +333,7 @@ language_item_table! {
333333
RangeTo, sym::RangeTo, range_to_struct, Target::Struct, GenericRequirement::None;
334334

335335
String, sym::String, string, Target::Struct, GenericRequirement::None;
336+
CStr, sym::CStr, c_str, Target::Struct, GenericRequirement::None;
336337
}
337338

338339
pub enum GenericRequirement {

Diff for: compiler/rustc_hir_typeck/src/fn_ctxt/checks.rs

+5
Original file line numberDiff line numberDiff line change
@@ -1300,6 +1300,11 @@ impl<'a, 'tcx> FnCtxt<'a, 'tcx> {
13001300
opt_ty.unwrap_or_else(|| self.next_float_var())
13011301
}
13021302
ast::LitKind::Bool(_) => tcx.types.bool,
1303+
ast::LitKind::CStr(_, _) => tcx.mk_imm_ref(
1304+
tcx.lifetimes.re_static,
1305+
tcx.type_of(tcx.require_lang_item(hir::LangItem::CStr, Some(lit.span)))
1306+
.skip_binder(),
1307+
),
13031308
ast::LitKind::Err => tcx.ty_error_misc(),
13041309
}
13051310
}

Diff for: compiler/rustc_lexer/src/lib.rs

+57-33
Original file line numberDiff line numberDiff line change
@@ -186,12 +186,16 @@ pub enum LiteralKind {
186186
Str { terminated: bool },
187187
/// "b"abc"", "b"abc"
188188
ByteStr { terminated: bool },
189+
/// `c"abc"`, `c"abc`
190+
CStr { terminated: bool },
189191
/// "r"abc"", "r#"abc"#", "r####"ab"###"c"####", "r#"a". `None` indicates
190192
/// an invalid literal.
191193
RawStr { n_hashes: Option<u8> },
192194
/// "br"abc"", "br#"abc"#", "br####"ab"###"c"####", "br#"a". `None`
193195
/// indicates an invalid literal.
194196
RawByteStr { n_hashes: Option<u8> },
197+
/// `cr"abc"`, "cr#"abc"#", `cr#"a`. `None` indicates an invalid literal.
198+
RawCStr { n_hashes: Option<u8> },
195199
}
196200

197201
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
@@ -357,39 +361,18 @@ impl Cursor<'_> {
357361
},
358362

359363
// Byte literal, byte string literal, raw byte string literal or identifier.
360-
'b' => match (self.first(), self.second()) {
361-
('\'', _) => {
362-
self.bump();
363-
let terminated = self.single_quoted_string();
364-
let suffix_start = self.pos_within_token();
365-
if terminated {
366-
self.eat_literal_suffix();
367-
}
368-
let kind = Byte { terminated };
369-
Literal { kind, suffix_start }
370-
}
371-
('"', _) => {
372-
self.bump();
373-
let terminated = self.double_quoted_string();
374-
let suffix_start = self.pos_within_token();
375-
if terminated {
376-
self.eat_literal_suffix();
377-
}
378-
let kind = ByteStr { terminated };
379-
Literal { kind, suffix_start }
380-
}
381-
('r', '"') | ('r', '#') => {
382-
self.bump();
383-
let res = self.raw_double_quoted_string(2);
384-
let suffix_start = self.pos_within_token();
385-
if res.is_ok() {
386-
self.eat_literal_suffix();
387-
}
388-
let kind = RawByteStr { n_hashes: res.ok() };
389-
Literal { kind, suffix_start }
390-
}
391-
_ => self.ident_or_unknown_prefix(),
392-
},
364+
'b' => self.c_or_byte_string(
365+
|terminated| ByteStr { terminated },
366+
|n_hashes| RawByteStr { n_hashes },
367+
Some(|terminated| Byte { terminated }),
368+
),
369+
370+
// c-string literal, raw c-string literal or identifier.
371+
'c' => self.c_or_byte_string(
372+
|terminated| CStr { terminated },
373+
|n_hashes| RawCStr { n_hashes },
374+
None,
375+
),
393376

394377
// Identifier (this should be checked after other variant that can
395378
// start as identifier).
@@ -553,6 +536,47 @@ impl Cursor<'_> {
553536
}
554537
}
555538

539+
fn c_or_byte_string(
540+
&mut self,
541+
mk_kind: impl FnOnce(bool) -> LiteralKind,
542+
mk_kind_raw: impl FnOnce(Option<u8>) -> LiteralKind,
543+
single_quoted: Option<fn(bool) -> LiteralKind>,
544+
) -> TokenKind {
545+
match (self.first(), self.second(), single_quoted) {
546+
('\'', _, Some(mk_kind)) => {
547+
self.bump();
548+
let terminated = self.single_quoted_string();
549+
let suffix_start = self.pos_within_token();
550+
if terminated {
551+
self.eat_literal_suffix();
552+
}
553+
let kind = mk_kind(terminated);
554+
Literal { kind, suffix_start }
555+
}
556+
('"', _, _) => {
557+
self.bump();
558+
let terminated = self.double_quoted_string();
559+
let suffix_start = self.pos_within_token();
560+
if terminated {
561+
self.eat_literal_suffix();
562+
}
563+
let kind = mk_kind(terminated);
564+
Literal { kind, suffix_start }
565+
}
566+
('r', '"', _) | ('r', '#', _) => {
567+
self.bump();
568+
let res = self.raw_double_quoted_string(2);
569+
let suffix_start = self.pos_within_token();
570+
if res.is_ok() {
571+
self.eat_literal_suffix();
572+
}
573+
let kind = mk_kind_raw(res.ok());
574+
Literal { kind, suffix_start }
575+
}
576+
_ => self.ident_or_unknown_prefix(),
577+
}
578+
}
579+
556580
fn number(&mut self, first_digit: char) -> LiteralKind {
557581
debug_assert!('0' <= self.prev() && self.prev() <= '9');
558582
let mut base = Base::Decimal;

0 commit comments

Comments
 (0)