diff --git a/compiler/rustc_ast/src/ast.rs b/compiler/rustc_ast/src/ast.rs index b5dba0713bfa0..e3ac8a8784a7c 100644 --- a/compiler/rustc_ast/src/ast.rs +++ b/compiler/rustc_ast/src/ast.rs @@ -1821,6 +1821,8 @@ pub enum LitKind { /// A byte string (`b"foo"`). Not stored as a symbol because it might be /// non-utf8, and symbols only allow utf8 strings. ByteStr(Lrc<[u8]>, StrStyle), + /// A C String (`c"foo"`). Guaranteed to only have `\0` at the end. + CStr(Lrc<[u8]>, StrStyle), /// A byte char (`b'f'`). Byte(u8), /// A character literal (`'a'`). @@ -1875,6 +1877,7 @@ impl LitKind { // unsuffixed variants LitKind::Str(..) | LitKind::ByteStr(..) + | LitKind::CStr(..) | LitKind::Byte(..) | LitKind::Char(..) | LitKind::Int(_, LitIntType::Unsuffixed) diff --git a/compiler/rustc_ast/src/token.rs b/compiler/rustc_ast/src/token.rs index f947ae4d05732..42b843482a32b 100644 --- a/compiler/rustc_ast/src/token.rs +++ b/compiler/rustc_ast/src/token.rs @@ -74,6 +74,8 @@ pub enum LitKind { StrRaw(u8), // raw string delimited by `n` hash symbols ByteStr, ByteStrRaw(u8), // raw byte string delimited by `n` hash symbols + CStr, + CStrRaw(u8), Err, } @@ -141,6 +143,10 @@ impl fmt::Display for Lit { delim = "#".repeat(n as usize), string = symbol )?, + CStr => write!(f, "c\"{symbol}\"")?, + CStrRaw(n) => { + write!(f, "cr{delim}\"{symbol}\"{delim}", delim = "#".repeat(n as usize))? + } Integer | Float | Bool | Err => write!(f, "{symbol}")?, } @@ -170,6 +176,7 @@ impl LitKind { Float => "float", Str | StrRaw(..) => "string", ByteStr | ByteStrRaw(..) => "byte string", + CStr | CStrRaw(..) => "C string", Err => "error", } } diff --git a/compiler/rustc_ast/src/util/literal.rs b/compiler/rustc_ast/src/util/literal.rs index 74b842ac96eac..15a54fe13d0b7 100644 --- a/compiler/rustc_ast/src/util/literal.rs +++ b/compiler/rustc_ast/src/util/literal.rs @@ -2,9 +2,13 @@ use crate::ast::{self, LitKind, MetaItemLit, StrStyle}; use crate::token::{self, Token}; -use rustc_lexer::unescape::{byte_from_char, unescape_byte, unescape_char, unescape_literal, Mode}; +use rustc_lexer::unescape::{ + byte_from_char, unescape_byte, unescape_c_string, unescape_char, unescape_literal, CStrUnit, + Mode, +}; use rustc_span::symbol::{kw, sym, Symbol}; use rustc_span::Span; +use std::ops::Range; use std::{ascii, fmt, str}; // Escapes a string, represented as a symbol. Reuses the original symbol, @@ -35,6 +39,7 @@ pub enum LitError { InvalidFloatSuffix, NonDecimalFloat(u32), IntTooLarge(u32), + NulInCStr(Range), } impl LitKind { @@ -158,6 +163,52 @@ impl LitKind { LitKind::ByteStr(bytes.into(), StrStyle::Raw(n)) } + token::CStr => { + let s = symbol.as_str(); + let mut buf = Vec::with_capacity(s.len()); + let mut error = Ok(()); + unescape_c_string(s, Mode::CStr, &mut |span, c| match c { + Ok(CStrUnit::Byte(0) | CStrUnit::Char('\0')) => { + error = Err(LitError::NulInCStr(span)); + } + Ok(CStrUnit::Byte(b)) => buf.push(b), + Ok(CStrUnit::Char(c)) if c.len_utf8() == 1 => buf.push(c as u8), + Ok(CStrUnit::Char(c)) => { + buf.extend_from_slice(c.encode_utf8(&mut [0; 4]).as_bytes()) + } + Err(err) => { + if err.is_fatal() { + error = Err(LitError::LexerError); + } + } + }); + error?; + buf.push(0); + LitKind::CStr(buf.into(), StrStyle::Cooked) + } + token::CStrRaw(n) => { + let s = symbol.as_str(); + let mut buf = Vec::with_capacity(s.len()); + let mut error = Ok(()); + unescape_c_string(s, Mode::RawCStr, &mut |span, c| match c { + Ok(CStrUnit::Byte(0) | CStrUnit::Char('\0')) => { + error = Err(LitError::NulInCStr(span)); + } + Ok(CStrUnit::Byte(b)) => buf.push(b), + Ok(CStrUnit::Char(c)) if c.len_utf8() == 1 => buf.push(c as u8), + Ok(CStrUnit::Char(c)) => { + buf.extend_from_slice(c.encode_utf8(&mut [0; 4]).as_bytes()) + } + Err(err) => { + if err.is_fatal() { + error = Err(LitError::LexerError); + } + } + }); + error?; + buf.push(0); + LitKind::CStr(buf.into(), StrStyle::Raw(n)) + } token::Err => LitKind::Err, }) } @@ -191,6 +242,14 @@ impl fmt::Display for LitKind { string = symbol )?; } + LitKind::CStr(ref bytes, StrStyle::Cooked) => { + write!(f, "c\"{}\"", escape_byte_str_symbol(bytes))? + } + LitKind::CStr(ref bytes, StrStyle::Raw(n)) => { + // This can only be valid UTF-8. + let symbol = str::from_utf8(bytes).unwrap(); + write!(f, "cr{delim}\"{symbol}\"{delim}", delim = "#".repeat(n as usize),)?; + } LitKind::Int(n, ty) => { write!(f, "{n}")?; match ty { @@ -237,6 +296,8 @@ impl MetaItemLit { LitKind::Str(_, ast::StrStyle::Raw(n)) => token::StrRaw(n), LitKind::ByteStr(_, ast::StrStyle::Cooked) => token::ByteStr, LitKind::ByteStr(_, ast::StrStyle::Raw(n)) => token::ByteStrRaw(n), + LitKind::CStr(_, ast::StrStyle::Cooked) => token::CStr, + LitKind::CStr(_, ast::StrStyle::Raw(n)) => token::CStrRaw(n), LitKind::Byte(_) => token::Byte, LitKind::Char(_) => token::Char, LitKind::Int(..) => token::Integer, diff --git a/compiler/rustc_ast_passes/src/feature_gate.rs b/compiler/rustc_ast_passes/src/feature_gate.rs index a46fe9e898fc6..b960671bf6e15 100644 --- a/compiler/rustc_ast_passes/src/feature_gate.rs +++ b/compiler/rustc_ast_passes/src/feature_gate.rs @@ -572,6 +572,7 @@ pub fn check_crate(krate: &ast::Crate, sess: &Session) { } }; } + gate_all!(c_str_literals, "`c\"..\"` literals are experimental"); gate_all!( if_let_guard, "`if let` guards are experimental", diff --git a/compiler/rustc_ast_pretty/src/pprust/state.rs b/compiler/rustc_ast_pretty/src/pprust/state.rs index ae346510ccc82..3f80728a2606b 100644 --- a/compiler/rustc_ast_pretty/src/pprust/state.rs +++ b/compiler/rustc_ast_pretty/src/pprust/state.rs @@ -210,6 +210,10 @@ pub fn literal_to_string(lit: token::Lit) -> String { token::ByteStrRaw(n) => { format!("br{delim}\"{string}\"{delim}", delim = "#".repeat(n as usize), string = symbol) } + token::CStr => format!("c\"{symbol}\""), + token::CStrRaw(n) => { + format!("cr{delim}\"{symbol}\"{delim}", delim = "#".repeat(n as usize)) + } token::Integer | token::Float | token::Bool | token::Err => symbol.to_string(), }; diff --git a/compiler/rustc_builtin_macros/src/concat.rs b/compiler/rustc_builtin_macros/src/concat.rs index b92964d03e9f9..50e88ae2eeede 100644 --- a/compiler/rustc_builtin_macros/src/concat.rs +++ b/compiler/rustc_builtin_macros/src/concat.rs @@ -32,6 +32,10 @@ pub fn expand_concat( Ok(ast::LitKind::Bool(b)) => { accumulator.push_str(&b.to_string()); } + Ok(ast::LitKind::CStr(..)) => { + cx.span_err(e.span, "cannot concatenate a C string literal"); + has_errors = true; + } Ok(ast::LitKind::Byte(..) | ast::LitKind::ByteStr(..)) => { cx.emit_err(errors::ConcatBytestr { span: e.span }); has_errors = true; diff --git a/compiler/rustc_builtin_macros/src/concat_bytes.rs b/compiler/rustc_builtin_macros/src/concat_bytes.rs index ba639c0a9fe3c..5ef35af0a059a 100644 --- a/compiler/rustc_builtin_macros/src/concat_bytes.rs +++ b/compiler/rustc_builtin_macros/src/concat_bytes.rs @@ -18,6 +18,11 @@ fn invalid_type_err( }; let snippet = cx.sess.source_map().span_to_snippet(span).ok(); match ast::LitKind::from_token_lit(token_lit) { + Ok(ast::LitKind::CStr(_, _)) => { + // FIXME(c_str_literals): should concatenation of C string literals + // include the null bytes in the end? + cx.span_err(span, "cannot concatenate C string literals"); + } Ok(ast::LitKind::Char(_)) => { let sugg = snippet.map(|snippet| ConcatBytesInvalidSuggestion::CharLit { span, snippet }); diff --git a/compiler/rustc_expand/src/proc_macro_server.rs b/compiler/rustc_expand/src/proc_macro_server.rs index 1e7d07bc22d52..891e84a2f3071 100644 --- a/compiler/rustc_expand/src/proc_macro_server.rs +++ b/compiler/rustc_expand/src/proc_macro_server.rs @@ -61,6 +61,8 @@ impl FromInternal for LitKind { token::StrRaw(n) => LitKind::StrRaw(n), token::ByteStr => LitKind::ByteStr, token::ByteStrRaw(n) => LitKind::ByteStrRaw(n), + token::CStr => LitKind::CStr, + token::CStrRaw(n) => LitKind::CStrRaw(n), token::Err => LitKind::Err, token::Bool => unreachable!(), } @@ -78,6 +80,8 @@ impl ToInternal for LitKind { LitKind::StrRaw(n) => token::StrRaw(n), LitKind::ByteStr => token::ByteStr, LitKind::ByteStrRaw(n) => token::ByteStrRaw(n), + LitKind::CStr => token::CStr, + LitKind::CStrRaw(n) => token::CStrRaw(n), LitKind::Err => token::Err, } } @@ -436,6 +440,8 @@ impl server::FreeFunctions for Rustc<'_, '_> { | token::LitKind::StrRaw(_) | token::LitKind::ByteStr | token::LitKind::ByteStrRaw(_) + | token::LitKind::CStr + | token::LitKind::CStrRaw(_) | token::LitKind::Err => return Err(()), token::LitKind::Integer | token::LitKind::Float => {} } diff --git a/compiler/rustc_feature/src/active.rs b/compiler/rustc_feature/src/active.rs index dc5dc4608a07f..7e7df0e958438 100644 --- a/compiler/rustc_feature/src/active.rs +++ b/compiler/rustc_feature/src/active.rs @@ -313,6 +313,8 @@ declare_features! ( (active, async_closure, "1.37.0", Some(62290), None), /// Allows async functions to be declared, implemented, and used in traits. (active, async_fn_in_trait, "1.66.0", Some(91611), None), + /// Allows `c"foo"` literals. + (active, c_str_literals, "CURRENT_RUSTC_VERSION", Some(105723), None), /// Treat `extern "C"` function as nounwind. (active, c_unwind, "1.52.0", Some(74990), None), /// Allows using C-variadics. diff --git a/compiler/rustc_hir/src/lang_items.rs b/compiler/rustc_hir/src/lang_items.rs index 0a8bd1d612389..1f08befb180c9 100644 --- a/compiler/rustc_hir/src/lang_items.rs +++ b/compiler/rustc_hir/src/lang_items.rs @@ -333,6 +333,7 @@ language_item_table! { RangeTo, sym::RangeTo, range_to_struct, Target::Struct, GenericRequirement::None; String, sym::String, string, Target::Struct, GenericRequirement::None; + CStr, sym::CStr, c_str, Target::Struct, GenericRequirement::None; } pub enum GenericRequirement { diff --git a/compiler/rustc_hir_typeck/src/fn_ctxt/checks.rs b/compiler/rustc_hir_typeck/src/fn_ctxt/checks.rs index 78bd489a44b6e..4b8fc7303a20c 100644 --- a/compiler/rustc_hir_typeck/src/fn_ctxt/checks.rs +++ b/compiler/rustc_hir_typeck/src/fn_ctxt/checks.rs @@ -1300,6 +1300,11 @@ impl<'a, 'tcx> FnCtxt<'a, 'tcx> { opt_ty.unwrap_or_else(|| self.next_float_var()) } ast::LitKind::Bool(_) => tcx.types.bool, + ast::LitKind::CStr(_, _) => tcx.mk_imm_ref( + tcx.lifetimes.re_static, + tcx.type_of(tcx.require_lang_item(hir::LangItem::CStr, Some(lit.span))) + .skip_binder(), + ), ast::LitKind::Err => tcx.ty_error_misc(), } } diff --git a/compiler/rustc_lexer/src/lib.rs b/compiler/rustc_lexer/src/lib.rs index b3f4b5cd5e5a0..c07dc19a0ac3a 100644 --- a/compiler/rustc_lexer/src/lib.rs +++ b/compiler/rustc_lexer/src/lib.rs @@ -186,12 +186,16 @@ pub enum LiteralKind { Str { terminated: bool }, /// "b"abc"", "b"abc" ByteStr { terminated: bool }, + /// `c"abc"`, `c"abc` + CStr { terminated: bool }, /// "r"abc"", "r#"abc"#", "r####"ab"###"c"####", "r#"a". `None` indicates /// an invalid literal. RawStr { n_hashes: Option }, /// "br"abc"", "br#"abc"#", "br####"ab"###"c"####", "br#"a". `None` /// indicates an invalid literal. RawByteStr { n_hashes: Option }, + /// `cr"abc"`, "cr#"abc"#", `cr#"a`. `None` indicates an invalid literal. + RawCStr { n_hashes: Option }, } #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] @@ -357,39 +361,18 @@ impl Cursor<'_> { }, // Byte literal, byte string literal, raw byte string literal or identifier. - 'b' => match (self.first(), self.second()) { - ('\'', _) => { - self.bump(); - let terminated = self.single_quoted_string(); - let suffix_start = self.pos_within_token(); - if terminated { - self.eat_literal_suffix(); - } - let kind = Byte { terminated }; - Literal { kind, suffix_start } - } - ('"', _) => { - self.bump(); - let terminated = self.double_quoted_string(); - let suffix_start = self.pos_within_token(); - if terminated { - self.eat_literal_suffix(); - } - let kind = ByteStr { terminated }; - Literal { kind, suffix_start } - } - ('r', '"') | ('r', '#') => { - self.bump(); - let res = self.raw_double_quoted_string(2); - let suffix_start = self.pos_within_token(); - if res.is_ok() { - self.eat_literal_suffix(); - } - let kind = RawByteStr { n_hashes: res.ok() }; - Literal { kind, suffix_start } - } - _ => self.ident_or_unknown_prefix(), - }, + 'b' => self.c_or_byte_string( + |terminated| ByteStr { terminated }, + |n_hashes| RawByteStr { n_hashes }, + Some(|terminated| Byte { terminated }), + ), + + // c-string literal, raw c-string literal or identifier. + 'c' => self.c_or_byte_string( + |terminated| CStr { terminated }, + |n_hashes| RawCStr { n_hashes }, + None, + ), // Identifier (this should be checked after other variant that can // start as identifier). @@ -553,6 +536,47 @@ impl Cursor<'_> { } } + fn c_or_byte_string( + &mut self, + mk_kind: impl FnOnce(bool) -> LiteralKind, + mk_kind_raw: impl FnOnce(Option) -> LiteralKind, + single_quoted: Option LiteralKind>, + ) -> TokenKind { + match (self.first(), self.second(), single_quoted) { + ('\'', _, Some(mk_kind)) => { + self.bump(); + let terminated = self.single_quoted_string(); + let suffix_start = self.pos_within_token(); + if terminated { + self.eat_literal_suffix(); + } + let kind = mk_kind(terminated); + Literal { kind, suffix_start } + } + ('"', _, _) => { + self.bump(); + let terminated = self.double_quoted_string(); + let suffix_start = self.pos_within_token(); + if terminated { + self.eat_literal_suffix(); + } + let kind = mk_kind(terminated); + Literal { kind, suffix_start } + } + ('r', '"', _) | ('r', '#', _) => { + self.bump(); + let res = self.raw_double_quoted_string(2); + let suffix_start = self.pos_within_token(); + if res.is_ok() { + self.eat_literal_suffix(); + } + let kind = mk_kind_raw(res.ok()); + Literal { kind, suffix_start } + } + _ => self.ident_or_unknown_prefix(), + } + } + fn number(&mut self, first_digit: char) -> LiteralKind { debug_assert!('0' <= self.prev() && self.prev() <= '9'); let mut base = Base::Decimal; diff --git a/compiler/rustc_lexer/src/unescape.rs b/compiler/rustc_lexer/src/unescape.rs index bb4d91247b81d..c9ad54d8d9806 100644 --- a/compiler/rustc_lexer/src/unescape.rs +++ b/compiler/rustc_lexer/src/unescape.rs @@ -86,10 +86,45 @@ where let res = unescape_char_or_byte(&mut chars, mode == Mode::Byte); callback(0..(src.len() - chars.as_str().len()), res); } - Mode::Str | Mode::ByteStr => unescape_str_or_byte_str(src, mode == Mode::ByteStr, callback), + Mode::Str | Mode::ByteStr => unescape_str_common(src, mode, callback), + Mode::RawStr | Mode::RawByteStr => { unescape_raw_str_or_raw_byte_str(src, mode == Mode::RawByteStr, callback) } + Mode::CStr | Mode::RawCStr => unreachable!(), + } +} + +/// A unit within CStr. Must not be a nul character. +pub enum CStrUnit { + Byte(u8), + Char(char), +} + +impl From for CStrUnit { + fn from(value: u8) -> Self { + CStrUnit::Byte(value) + } +} + +impl From for CStrUnit { + fn from(value: char) -> Self { + CStrUnit::Char(value) + } +} + +pub fn unescape_c_string(src: &str, mode: Mode, callback: &mut F) +where + F: FnMut(Range, Result), +{ + if mode == Mode::RawCStr { + unescape_raw_str_or_raw_byte_str( + src, + mode.characters_should_be_ascii(), + &mut |r, result| callback(r, result.map(CStrUnit::Char)), + ); + } else { + unescape_str_common(src, mode, callback); } } @@ -114,34 +149,69 @@ pub enum Mode { ByteStr, RawStr, RawByteStr, + CStr, + RawCStr, } impl Mode { pub fn in_double_quotes(self) -> bool { match self { - Mode::Str | Mode::ByteStr | Mode::RawStr | Mode::RawByteStr => true, + Mode::Str + | Mode::ByteStr + | Mode::RawStr + | Mode::RawByteStr + | Mode::CStr + | Mode::RawCStr => true, Mode::Char | Mode::Byte => false, } } - pub fn is_byte(self) -> bool { + /// Non-byte literals should have `\xXX` escapes that are within the ASCII range. + pub fn ascii_escapes_should_be_ascii(self) -> bool { + match self { + Mode::Char | Mode::Str | Mode::RawStr => true, + Mode::Byte | Mode::ByteStr | Mode::RawByteStr | Mode::CStr | Mode::RawCStr => false, + } + } + + /// Whether characters within the literal must be within the ASCII range + pub fn characters_should_be_ascii(self) -> bool { + match self { + Mode::Byte | Mode::ByteStr | Mode::RawByteStr => true, + Mode::Char | Mode::Str | Mode::RawStr | Mode::CStr | Mode::RawCStr => false, + } + } + + /// Byte literals do not allow unicode escape. + pub fn is_unicode_escape_disallowed(self) -> bool { match self { Mode::Byte | Mode::ByteStr | Mode::RawByteStr => true, - Mode::Char | Mode::Str | Mode::RawStr => false, + Mode::Char | Mode::Str | Mode::RawStr | Mode::CStr | Mode::RawCStr => false, + } + } + + pub fn prefix_noraw(self) -> &'static str { + match self { + Mode::Byte | Mode::ByteStr | Mode::RawByteStr => "b", + Mode::CStr | Mode::RawCStr => "c", + Mode::Char | Mode::Str | Mode::RawStr => "", } } } -fn scan_escape(chars: &mut Chars<'_>, is_byte: bool) -> Result { +fn scan_escape + From>( + chars: &mut Chars<'_>, + mode: Mode, +) -> Result { // Previous character was '\\', unescape what follows. let res = match chars.next().ok_or(EscapeError::LoneSlash)? { - '"' => '"', - 'n' => '\n', - 'r' => '\r', - 't' => '\t', - '\\' => '\\', - '\'' => '\'', - '0' => '\0', + '"' => b'"', + 'n' => b'\n', + 'r' => b'\r', + 't' => b'\t', + '\\' => b'\\', + '\'' => b'\'', + '0' => b'\0', 'x' => { // Parse hexadecimal character code. @@ -154,76 +224,78 @@ fn scan_escape(chars: &mut Chars<'_>, is_byte: bool) -> Result { - // We've parsed '\u', now we have to parse '{..}'. + 'u' => return scan_unicode(chars, mode.is_unicode_escape_disallowed()).map(Into::into), + _ => return Err(EscapeError::InvalidEscape), + }; + Ok(res.into()) +} + +fn scan_unicode( + chars: &mut Chars<'_>, + is_unicode_escape_disallowed: bool, +) -> Result { + // We've parsed '\u', now we have to parse '{..}'. - if chars.next() != Some('{') { - return Err(EscapeError::NoBraceInUnicodeEscape); - } + if chars.next() != Some('{') { + return Err(EscapeError::NoBraceInUnicodeEscape); + } - // First character must be a hexadecimal digit. - let mut n_digits = 1; - let mut value: u32 = match chars.next().ok_or(EscapeError::UnclosedUnicodeEscape)? { - '_' => return Err(EscapeError::LeadingUnderscoreUnicodeEscape), - '}' => return Err(EscapeError::EmptyUnicodeEscape), - c => c.to_digit(16).ok_or(EscapeError::InvalidCharInUnicodeEscape)?, - }; - - // First character is valid, now parse the rest of the number - // and closing brace. - loop { - match chars.next() { - None => return Err(EscapeError::UnclosedUnicodeEscape), - Some('_') => continue, - Some('}') => { - if n_digits > 6 { - return Err(EscapeError::OverlongUnicodeEscape); - } - - // Incorrect syntax has higher priority for error reporting - // than unallowed value for a literal. - if is_byte { - return Err(EscapeError::UnicodeEscapeInByte); - } - - break std::char::from_u32(value).ok_or_else(|| { - if value > 0x10FFFF { - EscapeError::OutOfRangeUnicodeEscape - } else { - EscapeError::LoneSurrogateUnicodeEscape - } - })?; - } - Some(c) => { - let digit: u32 = - c.to_digit(16).ok_or(EscapeError::InvalidCharInUnicodeEscape)?; - n_digits += 1; - if n_digits > 6 { - // Stop updating value since we're sure that it's incorrect already. - continue; - } - value = value * 16 + digit; + // First character must be a hexadecimal digit. + let mut n_digits = 1; + let mut value: u32 = match chars.next().ok_or(EscapeError::UnclosedUnicodeEscape)? { + '_' => return Err(EscapeError::LeadingUnderscoreUnicodeEscape), + '}' => return Err(EscapeError::EmptyUnicodeEscape), + c => c.to_digit(16).ok_or(EscapeError::InvalidCharInUnicodeEscape)?, + }; + + // First character is valid, now parse the rest of the number + // and closing brace. + loop { + match chars.next() { + None => return Err(EscapeError::UnclosedUnicodeEscape), + Some('_') => continue, + Some('}') => { + if n_digits > 6 { + return Err(EscapeError::OverlongUnicodeEscape); + } + + // Incorrect syntax has higher priority for error reporting + // than unallowed value for a literal. + if is_unicode_escape_disallowed { + return Err(EscapeError::UnicodeEscapeInByte); + } + + break std::char::from_u32(value).ok_or_else(|| { + if value > 0x10FFFF { + EscapeError::OutOfRangeUnicodeEscape + } else { + EscapeError::LoneSurrogateUnicodeEscape } - }; + }); } - } - _ => return Err(EscapeError::InvalidEscape), - }; - Ok(res) + Some(c) => { + let digit: u32 = c.to_digit(16).ok_or(EscapeError::InvalidCharInUnicodeEscape)?; + n_digits += 1; + if n_digits > 6 { + // Stop updating value since we're sure that it's incorrect already. + continue; + } + value = value * 16 + digit; + } + }; + } } #[inline] -fn ascii_check(c: char, is_byte: bool) -> Result { - if is_byte && !c.is_ascii() { +fn ascii_check(c: char, characters_should_be_ascii: bool) -> Result { + if characters_should_be_ascii && !c.is_ascii() { // Byte literal can't be a non-ascii character. Err(EscapeError::NonAsciiCharInByte) } else { @@ -234,7 +306,7 @@ fn ascii_check(c: char, is_byte: bool) -> Result { fn unescape_char_or_byte(chars: &mut Chars<'_>, is_byte: bool) -> Result { let c = chars.next().ok_or(EscapeError::ZeroChars)?; let res = match c { - '\\' => scan_escape(chars, is_byte), + '\\' => scan_escape(chars, if is_byte { Mode::Byte } else { Mode::Char }), '\n' | '\t' | '\'' => Err(EscapeError::EscapeOnlyChar), '\r' => Err(EscapeError::BareCarriageReturn), _ => ascii_check(c, is_byte), @@ -247,9 +319,9 @@ fn unescape_char_or_byte(chars: &mut Chars<'_>, is_byte: bool) -> Result(src: &str, is_byte: bool, callback: &mut F) +fn unescape_str_common + From>(src: &str, mode: Mode, callback: &mut F) where - F: FnMut(Range, Result), + F: FnMut(Range, Result), { let mut chars = src.chars(); @@ -266,47 +338,49 @@ where // if unescaped '\' character is followed by '\n'. // For details see [Rust language reference] // (https://doc.rust-lang.org/reference/tokens.html#string-literals). - skip_ascii_whitespace(&mut chars, start, callback); + skip_ascii_whitespace(&mut chars, start, &mut |range, err| { + callback(range, Err(err)) + }); continue; } - _ => scan_escape(&mut chars, is_byte), + _ => scan_escape::(&mut chars, mode), } } - '\n' => Ok('\n'), - '\t' => Ok('\t'), + '\n' => Ok(b'\n'.into()), + '\t' => Ok(b'\t'.into()), '"' => Err(EscapeError::EscapeOnlyChar), '\r' => Err(EscapeError::BareCarriageReturn), - _ => ascii_check(c, is_byte), + _ => ascii_check(c, mode.characters_should_be_ascii()).map(Into::into), }; let end = src.len() - chars.as_str().len(); - callback(start..end, res); + callback(start..end, res.map(Into::into)); } +} - fn skip_ascii_whitespace(chars: &mut Chars<'_>, start: usize, callback: &mut F) - where - F: FnMut(Range, Result), - { - let tail = chars.as_str(); - let first_non_space = tail - .bytes() - .position(|b| b != b' ' && b != b'\t' && b != b'\n' && b != b'\r') - .unwrap_or(tail.len()); - if tail[1..first_non_space].contains('\n') { - // The +1 accounts for the escaping slash. - let end = start + first_non_space + 1; - callback(start..end, Err(EscapeError::MultipleSkippedLinesWarning)); - } - let tail = &tail[first_non_space..]; - if let Some(c) = tail.chars().nth(0) { - if c.is_whitespace() { - // For error reporting, we would like the span to contain the character that was not - // skipped. The +1 is necessary to account for the leading \ that started the escape. - let end = start + first_non_space + c.len_utf8() + 1; - callback(start..end, Err(EscapeError::UnskippedWhitespaceWarning)); - } +fn skip_ascii_whitespace(chars: &mut Chars<'_>, start: usize, callback: &mut F) +where + F: FnMut(Range, EscapeError), +{ + let tail = chars.as_str(); + let first_non_space = tail + .bytes() + .position(|b| b != b' ' && b != b'\t' && b != b'\n' && b != b'\r') + .unwrap_or(tail.len()); + if tail[1..first_non_space].contains('\n') { + // The +1 accounts for the escaping slash. + let end = start + first_non_space + 1; + callback(start..end, EscapeError::MultipleSkippedLinesWarning); + } + let tail = &tail[first_non_space..]; + if let Some(c) = tail.chars().nth(0) { + if c.is_whitespace() { + // For error reporting, we would like the span to contain the character that was not + // skipped. The +1 is necessary to account for the leading \ that started the escape. + let end = start + first_non_space + c.len_utf8() + 1; + callback(start..end, EscapeError::UnskippedWhitespaceWarning); } - *chars = tail.chars(); } + *chars = tail.chars(); } /// Takes a contents of a string literal (without quotes) and produces a diff --git a/compiler/rustc_mir_build/src/build/expr/as_constant.rs b/compiler/rustc_mir_build/src/build/expr/as_constant.rs index fbcfd43372433..59549435233c5 100644 --- a/compiler/rustc_mir_build/src/build/expr/as_constant.rs +++ b/compiler/rustc_mir_build/src/build/expr/as_constant.rs @@ -146,6 +146,12 @@ pub(crate) fn lit_to_mir_constant<'tcx>( let id = tcx.allocate_bytes(data); ConstValue::Scalar(Scalar::from_pointer(id.into(), &tcx)) } + (ast::LitKind::CStr(data, _), ty::Ref(_, inner_ty, _)) if matches!(inner_ty.kind(), ty::Adt(def, _) if Some(def.did()) == tcx.lang_items().c_str()) => + { + let allocation = Allocation::from_bytes_byte_aligned_immutable(data as &[u8]); + let allocation = tcx.mk_const_alloc(allocation); + ConstValue::Slice { data: allocation, start: 0, end: data.len() } + } (ast::LitKind::Byte(n), ty::Uint(ty::UintTy::U8)) => { ConstValue::Scalar(Scalar::from_uint(*n, Size::from_bytes(1))) } diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs index b1c0dedd3c7dc..51e904890028a 100644 --- a/compiler/rustc_parse/src/lexer/mod.rs +++ b/compiler/rustc_parse/src/lexer/mod.rs @@ -1,3 +1,5 @@ +use std::ops::Range; + use crate::errors; use crate::lexer::unicode_chars::UNICODE_ARRAY; use crate::make_unclosed_delims_error; @@ -6,7 +8,7 @@ use rustc_ast::token::{self, CommentKind, Delimiter, Token, TokenKind}; use rustc_ast::tokenstream::TokenStream; use rustc_ast::util::unicode::contains_text_flow_control_chars; use rustc_errors::{error_code, Applicability, Diagnostic, DiagnosticBuilder, StashKey}; -use rustc_lexer::unescape::{self, Mode}; +use rustc_lexer::unescape::{self, EscapeError, Mode}; use rustc_lexer::Cursor; use rustc_lexer::{Base, DocStyle, RawStrError}; use rustc_session::lint::builtin::{ @@ -204,6 +206,9 @@ impl<'a> StringReader<'a> { rustc_lexer::TokenKind::Literal { kind, suffix_start } => { let suffix_start = start + BytePos(suffix_start); let (kind, symbol) = self.cook_lexer_literal(start, suffix_start, kind); + if let token::LitKind::CStr | token::LitKind::CStrRaw(_) = kind { + self.sess.gated_spans.gate(sym::c_str_literals, self.mk_sp(start, self.pos)); + } let suffix = if suffix_start < self.pos { let string = self.str_from(suffix_start); if string == "_" { @@ -415,6 +420,16 @@ impl<'a> StringReader<'a> { } self.cook_quoted(token::ByteStr, Mode::ByteStr, start, end, 2, 1) // b" " } + rustc_lexer::LiteralKind::CStr { terminated } => { + if !terminated { + self.sess.span_diagnostic.span_fatal_with_code( + self.mk_sp(start + BytePos(1), end), + "unterminated C string", + error_code!(E0767), + ) + } + self.cook_c_string(token::CStr, Mode::CStr, start, end, 2, 1) // c" " + } rustc_lexer::LiteralKind::RawStr { n_hashes } => { if let Some(n_hashes) = n_hashes { let n = u32::from(n_hashes); @@ -433,6 +448,15 @@ impl<'a> StringReader<'a> { self.report_raw_str_error(start, 2); } } + rustc_lexer::LiteralKind::RawCStr { n_hashes } => { + if let Some(n_hashes) = n_hashes { + let n = u32::from(n_hashes); + let kind = token::CStrRaw(n_hashes); + self.cook_c_string(kind, Mode::RawCStr, start, end, 3 + n, 1 + n) // cr##" "## + } else { + self.report_raw_str_error(start, 2); + } + } rustc_lexer::LiteralKind::Int { base, empty_int } => { if empty_int { let span = self.mk_sp(start, end); @@ -648,7 +672,7 @@ impl<'a> StringReader<'a> { self.sess.emit_fatal(errors::TooManyHashes { span: self.mk_sp(start, self.pos), num }); } - fn cook_quoted( + fn cook_common( &self, kind: token::LitKind, mode: Mode, @@ -656,12 +680,13 @@ impl<'a> StringReader<'a> { end: BytePos, prefix_len: u32, postfix_len: u32, + unescape: fn(&str, Mode, &mut dyn FnMut(Range, Result<(), EscapeError>)), ) -> (token::LitKind, Symbol) { let mut has_fatal_err = false; let content_start = start + BytePos(prefix_len); let content_end = end - BytePos(postfix_len); let lit_content = self.str_from_to(content_start, content_end); - unescape::unescape_literal(lit_content, mode, &mut |range, result| { + unescape(lit_content, mode, &mut |range, result| { // Here we only check for errors. The actual unescaping is done later. if let Err(err) = result { let span_with_quotes = self.mk_sp(start, end); @@ -692,6 +717,38 @@ impl<'a> StringReader<'a> { (token::Err, self.symbol_from_to(start, end)) } } + + fn cook_quoted( + &self, + kind: token::LitKind, + mode: Mode, + start: BytePos, + end: BytePos, + prefix_len: u32, + postfix_len: u32, + ) -> (token::LitKind, Symbol) { + self.cook_common(kind, mode, start, end, prefix_len, postfix_len, |src, mode, callback| { + unescape::unescape_literal(src, mode, &mut |span, result| { + callback(span, result.map(drop)) + }) + }) + } + + fn cook_c_string( + &self, + kind: token::LitKind, + mode: Mode, + start: BytePos, + end: BytePos, + prefix_len: u32, + postfix_len: u32, + ) -> (token::LitKind, Symbol) { + self.cook_common(kind, mode, start, end, prefix_len, postfix_len, |src, mode, callback| { + unescape::unescape_c_string(src, mode, &mut |span, result| { + callback(span, result.map(drop)) + }) + }) + } } pub fn nfc_normalize(string: &str) -> Symbol { diff --git a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs index d8bcf816fb263..eb9625f923ab9 100644 --- a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs +++ b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs @@ -78,8 +78,7 @@ pub(crate) fn emit_unescape_error( } }; let sugg = sugg.unwrap_or_else(|| { - let is_byte = mode.is_byte(); - let prefix = if is_byte { "b" } else { "" }; + let prefix = mode.prefix_noraw(); let mut escaped = String::with_capacity(lit.len()); let mut chrs = lit.chars().peekable(); while let Some(first) = chrs.next() { @@ -97,7 +96,11 @@ pub(crate) fn emit_unescape_error( }; } let sugg = format!("{prefix}\"{escaped}\""); - MoreThanOneCharSugg::Quotes { span: span_with_quotes, is_byte, sugg } + MoreThanOneCharSugg::Quotes { + span: span_with_quotes, + is_byte: mode == Mode::Byte, + sugg, + } }); handler.emit_err(UnescapeError::MoreThanOneChar { span: span_with_quotes, @@ -112,7 +115,7 @@ pub(crate) fn emit_unescape_error( char_span, escaped_sugg: c.escape_default().to_string(), escaped_msg: escaped_char(c), - byte: mode.is_byte(), + byte: mode == Mode::Byte, }); } EscapeError::BareCarriageReturn => { @@ -126,12 +129,15 @@ pub(crate) fn emit_unescape_error( EscapeError::InvalidEscape => { let (c, span) = last_char(); - let label = - if mode.is_byte() { "unknown byte escape" } else { "unknown character escape" }; + let label = if mode == Mode::Byte || mode == Mode::ByteStr { + "unknown byte escape" + } else { + "unknown character escape" + }; let ec = escaped_char(c); let mut diag = handler.struct_span_err(span, format!("{}: `{}`", label, ec)); diag.span_label(span, label); - if c == '{' || c == '}' && !mode.is_byte() { + if c == '{' || c == '}' && matches!(mode, Mode::Str | Mode::RawStr) { diag.help( "if used in a formatting string, curly braces are escaped with `{{` and `}}`", ); @@ -141,7 +147,7 @@ pub(crate) fn emit_unescape_error( version control settings", ); } else { - if !mode.is_byte() { + if mode == Mode::Str || mode == Mode::Char { diag.span_suggestion( span_with_quotes, "if you meant to write a literal backslash (perhaps escaping in a regular expression), consider a raw string literal", diff --git a/compiler/rustc_parse/src/parser/expr.rs b/compiler/rustc_parse/src/parser/expr.rs index f58f8919e5c9d..61396ee0d4aec 100644 --- a/compiler/rustc_parse/src/parser/expr.rs +++ b/compiler/rustc_parse/src/parser/expr.rs @@ -1870,6 +1870,7 @@ impl<'a> Parser<'a> { let recovered = self.recover_after_dot(); let token = recovered.as_ref().unwrap_or(&self.token); let span = token.span; + token::Lit::from_token(token).map(|token_lit| { self.bump(); (token_lit, span) diff --git a/compiler/rustc_session/messages.ftl b/compiler/rustc_session/messages.ftl index c897275bee2b7..a8fe560d1a760 100644 --- a/compiler/rustc_session/messages.ftl +++ b/compiler/rustc_session/messages.ftl @@ -101,3 +101,5 @@ session_invalid_int_literal_width = invalid width `{$width}` for integer literal .help = valid widths are 8, 16, 32, 64 and 128 session_optimization_fuel_exhausted = optimization-fuel-exhausted: {$msg} + +session_nul_in_c_str = null characters in C string literals are not supported diff --git a/compiler/rustc_session/src/errors.rs b/compiler/rustc_session/src/errors.rs index 0df62c2064ee8..546c0fa8e03e3 100644 --- a/compiler/rustc_session/src/errors.rs +++ b/compiler/rustc_session/src/errors.rs @@ -6,7 +6,7 @@ use rustc_ast::token; use rustc_ast::util::literal::LitError; use rustc_errors::{error_code, DiagnosticMessage, EmissionGuarantee, IntoDiagnostic, MultiSpan}; use rustc_macros::Diagnostic; -use rustc_span::{Span, Symbol}; +use rustc_span::{BytePos, Span, Symbol}; use rustc_target::spec::{SplitDebuginfo, StackProtector, TargetTriple}; #[derive(Diagnostic)] @@ -323,6 +323,13 @@ pub(crate) struct BinaryFloatLiteralNotSupported { pub span: Span, } +#[derive(Diagnostic)] +#[diag(session_nul_in_c_str)] +pub(crate) struct NulInCStr { + #[primary_span] + pub span: Span, +} + pub fn report_lit_error(sess: &ParseSess, err: LitError, lit: token::Lit, span: Span) { // Checks if `s` looks like i32 or u1234 etc. fn looks_like_width_suffix(first_chars: &[char], s: &str) -> bool { @@ -401,6 +408,12 @@ pub fn report_lit_error(sess: &ParseSess, err: LitError, lit: token::Lit, span: }; sess.emit_err(IntLiteralTooLarge { span, limit }); } + LitError::NulInCStr(range) => { + let lo = BytePos(span.lo().0 + range.start as u32 + 2); + let hi = BytePos(span.lo().0 + range.end as u32 + 2); + let span = span.with_lo(lo).with_hi(hi); + sess.emit_err(NulInCStr { span }); + } } } diff --git a/compiler/rustc_span/src/symbol.rs b/compiler/rustc_span/src/symbol.rs index 1140a922f9fd1..58015d5d5026c 100644 --- a/compiler/rustc_span/src/symbol.rs +++ b/compiler/rustc_span/src/symbol.rs @@ -441,6 +441,7 @@ symbols! { bridge, bswap, c_str, + c_str_literals, c_unwind, c_variadic, c_void, diff --git a/library/core/src/ffi/c_str.rs b/library/core/src/ffi/c_str.rs index bd2b2c36c4315..07b11814f965f 100644 --- a/library/core/src/ffi/c_str.rs +++ b/library/core/src/ffi/c_str.rs @@ -79,9 +79,9 @@ use crate::str; /// /// [str]: prim@str "str" #[derive(Hash)] -#[cfg_attr(not(test), rustc_diagnostic_item = "CStr")] #[stable(feature = "core_c_str", since = "1.64.0")] #[rustc_has_incoherent_inherent_impls] +#[cfg_attr(not(bootstrap), lang = "CStr")] // FIXME: // `fn from` in `impl From<&CStr> for Box` current implementation relies // on `CStr` being layout-compatible with `[u8]`. diff --git a/library/proc_macro/src/bridge/mod.rs b/library/proc_macro/src/bridge/mod.rs index 54b11c543f162..caecda1bc63fd 100644 --- a/library/proc_macro/src/bridge/mod.rs +++ b/library/proc_macro/src/bridge/mod.rs @@ -337,6 +337,8 @@ pub enum LitKind { StrRaw(u8), ByteStr, ByteStrRaw(u8), + CStr, + CStrRaw(u8), Err, } @@ -350,6 +352,8 @@ rpc_encode_decode!( StrRaw(n), ByteStr, ByteStrRaw(n), + CStr, + CStrRaw(n), Err, } ); diff --git a/src/librustdoc/html/highlight.rs b/src/librustdoc/html/highlight.rs index 946c85a205f5a..c94968b4817cb 100644 --- a/src/librustdoc/html/highlight.rs +++ b/src/librustdoc/html/highlight.rs @@ -811,7 +811,9 @@ impl<'src> Classifier<'src> { | LiteralKind::Str { .. } | LiteralKind::ByteStr { .. } | LiteralKind::RawStr { .. } - | LiteralKind::RawByteStr { .. } => Class::String, + | LiteralKind::RawByteStr { .. } + | LiteralKind::CStr { .. } + | LiteralKind::RawCStr { .. } => Class::String, // Number literals. LiteralKind::Float { .. } | LiteralKind::Int { .. } => Class::Number, }, diff --git a/src/tools/clippy/clippy_lints/src/matches/match_same_arms.rs b/src/tools/clippy/clippy_lints/src/matches/match_same_arms.rs index 158e6caa4de54..a48f4c77f857f 100644 --- a/src/tools/clippy/clippy_lints/src/matches/match_same_arms.rs +++ b/src/tools/clippy/clippy_lints/src/matches/match_same_arms.rs @@ -284,6 +284,7 @@ impl<'a> NormalizedPat<'a> { LitKind::Str(sym, _) => Self::LitStr(sym), LitKind::ByteStr(ref bytes, _) => Self::LitBytes(bytes), LitKind::Byte(val) => Self::LitInt(val.into()), + LitKind::CStr(ref bytes, _) => Self::LitBytes(bytes), LitKind::Char(val) => Self::LitInt(val.into()), LitKind::Int(val, _) => Self::LitInt(val), LitKind::Bool(val) => Self::LitBool(val), diff --git a/src/tools/clippy/clippy_lints/src/strlen_on_c_strings.rs b/src/tools/clippy/clippy_lints/src/strlen_on_c_strings.rs index 03324c66e8efc..2f2e84fa35a12 100644 --- a/src/tools/clippy/clippy_lints/src/strlen_on_c_strings.rs +++ b/src/tools/clippy/clippy_lints/src/strlen_on_c_strings.rs @@ -1,11 +1,11 @@ use clippy_utils::diagnostics::span_lint_and_sugg; use clippy_utils::source::snippet_with_context; -use clippy_utils::ty::is_type_diagnostic_item; +use clippy_utils::ty::{is_type_diagnostic_item, is_type_lang_item}; use clippy_utils::visitors::is_expr_unsafe; use clippy_utils::{get_parent_node, match_libc_symbol}; use if_chain::if_chain; use rustc_errors::Applicability; -use rustc_hir::{Block, BlockCheckMode, Expr, ExprKind, Node, UnsafeSource}; +use rustc_hir::{Block, BlockCheckMode, Expr, ExprKind, LangItem, Node, UnsafeSource}; use rustc_lint::{LateContext, LateLintPass}; use rustc_session::{declare_lint_pass, declare_tool_lint}; use rustc_span::symbol::sym; @@ -67,7 +67,7 @@ impl<'tcx> LateLintPass<'tcx> for StrlenOnCStrings { let val_name = snippet_with_context(cx, self_arg.span, ctxt, "..", &mut app).0; let method_name = if is_type_diagnostic_item(cx, ty, sym::cstring_type) { "as_bytes" - } else if is_type_diagnostic_item(cx, ty, sym::CStr) { + } else if is_type_lang_item(cx, ty, LangItem::CStr) { "to_bytes" } else { return; diff --git a/src/tools/clippy/clippy_lints/src/utils/author.rs b/src/tools/clippy/clippy_lints/src/utils/author.rs index 01927b6b5f10d..f75dff46624e4 100644 --- a/src/tools/clippy/clippy_lints/src/utils/author.rs +++ b/src/tools/clippy/clippy_lints/src/utils/author.rs @@ -304,6 +304,11 @@ impl<'a, 'tcx> PrintVisitor<'a, 'tcx> { kind!("ByteStr(ref {vec})"); chain!(self, "let [{:?}] = **{vec}", vec.value); }, + LitKind::CStr(ref vec, _) => { + bind!(self, vec); + kind!("CStr(ref {vec})"); + chain!(self, "let [{:?}] = **{vec}", vec.value); + } LitKind::Str(s, _) => { bind!(self, s); kind!("Str({s}, _)"); diff --git a/src/tools/clippy/clippy_utils/src/consts.rs b/src/tools/clippy/clippy_utils/src/consts.rs index 99bfc4b5717c8..7c7ec6d334d9b 100644 --- a/src/tools/clippy/clippy_utils/src/consts.rs +++ b/src/tools/clippy/clippy_utils/src/consts.rs @@ -211,6 +211,7 @@ pub fn lit_to_mir_constant(lit: &LitKind, ty: Option>) -> Constant { LitKind::Str(ref is, _) => Constant::Str(is.to_string()), LitKind::Byte(b) => Constant::Int(u128::from(b)), LitKind::ByteStr(ref s, _) => Constant::Binary(Lrc::clone(s)), + LitKind::CStr(ref s, _) => Constant::Binary(Lrc::clone(s)), LitKind::Char(c) => Constant::Char(c), LitKind::Int(n, _) => Constant::Int(n), LitKind::Float(ref is, LitFloatType::Suffixed(fty)) => match fty { diff --git a/tests/ui/rfcs/rfc-3348-c-string-literals/basic.rs b/tests/ui/rfcs/rfc-3348-c-string-literals/basic.rs new file mode 100644 index 0000000000000..e4b07ab8108e0 --- /dev/null +++ b/tests/ui/rfcs/rfc-3348-c-string-literals/basic.rs @@ -0,0 +1,7 @@ +// run-pass + +#![feature(c_str_literals)] + +fn main() { + assert_eq!(b"test\0", c"test".to_bytes_with_nul()); +} diff --git a/tests/ui/rfcs/rfc-3348-c-string-literals/gate.rs b/tests/ui/rfcs/rfc-3348-c-string-literals/gate.rs new file mode 100644 index 0000000000000..b27da26ed23bb --- /dev/null +++ b/tests/ui/rfcs/rfc-3348-c-string-literals/gate.rs @@ -0,0 +1,13 @@ +// gate-test-c_str_literals + +macro_rules! m { + ($t:tt) => {} +} + +fn main() { + c"foo"; + //~^ ERROR: `c".."` literals are experimental + + m!(c"test"); + //~^ ERROR: `c".."` literals are experimental +} diff --git a/tests/ui/rfcs/rfc-3348-c-string-literals/gate.stderr b/tests/ui/rfcs/rfc-3348-c-string-literals/gate.stderr new file mode 100644 index 0000000000000..bc0c537aada83 --- /dev/null +++ b/tests/ui/rfcs/rfc-3348-c-string-literals/gate.stderr @@ -0,0 +1,21 @@ +error[E0658]: `c".."` literals are experimental + --> $DIR/gate.rs:8:5 + | +LL | c"foo"; + | ^^^^^^ + | + = note: see issue #105723 for more information + = help: add `#![feature(c_str_literals)]` to the crate attributes to enable + +error[E0658]: `c".."` literals are experimental + --> $DIR/gate.rs:11:8 + | +LL | m!(c"test"); + | ^^^^^^^ + | + = note: see issue #105723 for more information + = help: add `#![feature(c_str_literals)]` to the crate attributes to enable + +error: aborting due to 2 previous errors + +For more information about this error, try `rustc --explain E0658`. diff --git a/tests/ui/rfcs/rfc-3348-c-string-literals/no-nuls.rs b/tests/ui/rfcs/rfc-3348-c-string-literals/no-nuls.rs new file mode 100644 index 0000000000000..7bc6097f124aa Binary files /dev/null and b/tests/ui/rfcs/rfc-3348-c-string-literals/no-nuls.rs differ diff --git a/tests/ui/rfcs/rfc-3348-c-string-literals/no-nuls.stderr b/tests/ui/rfcs/rfc-3348-c-string-literals/no-nuls.stderr new file mode 100644 index 0000000000000..ff9006f6f97f1 Binary files /dev/null and b/tests/ui/rfcs/rfc-3348-c-string-literals/no-nuls.stderr differ diff --git a/tests/ui/rfcs/rfc-3348-c-string-literals/non-ascii.rs b/tests/ui/rfcs/rfc-3348-c-string-literals/non-ascii.rs new file mode 100644 index 0000000000000..82e8e2090d7db --- /dev/null +++ b/tests/ui/rfcs/rfc-3348-c-string-literals/non-ascii.rs @@ -0,0 +1,10 @@ +// run-pass + +#![feature(c_str_literals)] + +fn main() { + assert_eq!( + c"\xEF\x80🦀\u{1F980}".to_bytes_with_nul(), + &[0xEF, 0x80, 0xF0, 0x9F, 0xA6, 0x80, 0xF0, 0x9F, 0xA6, 0x80, 0x00], + ); +}