diff --git a/compiler/rustc_ast/src/util/literal.rs b/compiler/rustc_ast/src/util/literal.rs index 92b9adf1db751..fbae496458813 100644 --- a/compiler/rustc_ast/src/util/literal.rs +++ b/compiler/rustc_ast/src/util/literal.rs @@ -8,7 +8,6 @@ use rustc_lexer::unescape::{ }; use rustc_span::symbol::{kw, sym, Symbol}; use rustc_span::Span; -use std::ops::Range; use std::{ascii, fmt, str}; // Escapes a string, represented as a symbol. Reuses the original symbol, @@ -39,7 +38,6 @@ pub enum LitError { InvalidFloatSuffix, NonDecimalFloat(u32), IntTooLarge(u32), - NulInCStr(Range), } impl LitKind { @@ -156,10 +154,7 @@ impl LitKind { let s = symbol.as_str(); let mut buf = Vec::with_capacity(s.len()); let mut error = Ok(()); - unescape_c_string(s, Mode::CStr, &mut |span, c| match c { - Ok(CStrUnit::Byte(0) | CStrUnit::Char('\0')) => { - error = Err(LitError::NulInCStr(span)); - } + unescape_c_string(s, Mode::CStr, &mut |_span, c| match c { Ok(CStrUnit::Byte(b)) => buf.push(b), Ok(CStrUnit::Char(c)) => { buf.extend_from_slice(c.encode_utf8(&mut [0; 4]).as_bytes()) @@ -179,10 +174,7 @@ impl LitKind { // can convert the symbol directly to a `Lrc` on success. let s = symbol.as_str(); let mut error = Ok(()); - unescape_c_string(s, Mode::RawCStr, &mut |span, c| match c { - Ok(CStrUnit::Byte(0) | CStrUnit::Char('\0')) => { - error = Err(LitError::NulInCStr(span)); - } + unescape_c_string(s, Mode::RawCStr, &mut |_, c| match c { Ok(_) => {} Err(err) => { if err.is_fatal() { diff --git a/compiler/rustc_lexer/src/unescape.rs b/compiler/rustc_lexer/src/unescape.rs index abec12f52a6e6..0a632c4d12ad5 100644 --- a/compiler/rustc_lexer/src/unescape.rs +++ b/compiler/rustc_lexer/src/unescape.rs @@ -59,6 +59,9 @@ pub enum EscapeError { /// Non-ascii character in byte literal, byte string literal, or raw byte string literal. NonAsciiCharInByte, + // `\0` in a C string literal. + NulInCStr, + /// After a line ending with '\', the next line contains whitespace /// characters that are not skipped. UnskippedWhitespaceWarning, @@ -122,10 +125,20 @@ where { match mode { CStr => { - unescape_non_raw_common(src, mode, callback); + unescape_non_raw_common(src, mode, &mut |r, mut result| { + if let Ok(CStrUnit::Byte(0) | CStrUnit::Char('\0')) = result { + result = Err(EscapeError::NulInCStr); + } + callback(r, result) + }); } RawCStr => { - check_raw_common(src, mode, &mut |r, result| callback(r, result.map(CStrUnit::Char))); + check_raw_common(src, mode, &mut |r, mut result| { + if let Ok('\0') = result { + result = Err(EscapeError::NulInCStr); + } + callback(r, result.map(CStrUnit::Char)) + }); } Char | Byte | Str | RawStr | ByteStr | RawByteStr => unreachable!(), } diff --git a/compiler/rustc_parse/messages.ftl b/compiler/rustc_parse/messages.ftl index d515e86a18296..fb19bb996f922 100644 --- a/compiler/rustc_parse/messages.ftl +++ b/compiler/rustc_parse/messages.ftl @@ -616,6 +616,8 @@ parse_note_mut_pattern_usage = `mut` may be followed by `variable` and `variable parse_note_pattern_alternatives_use_single_vert = alternatives in or-patterns are separated with `|`, not `||` +parse_nul_in_c_str = null characters in C string literals are not supported + parse_or_pattern_not_allowed_in_fn_parameters = top-level or-patterns are not allowed in function parameters parse_or_pattern_not_allowed_in_let_binding = top-level or-patterns are not allowed in `let` bindings parse_out_of_range_hex_escape = out of range hex escape diff --git a/compiler/rustc_parse/src/errors.rs b/compiler/rustc_parse/src/errors.rs index 34b34a1bad6c9..7dc711d96109d 100644 --- a/compiler/rustc_parse/src/errors.rs +++ b/compiler/rustc_parse/src/errors.rs @@ -2163,6 +2163,11 @@ pub enum UnescapeError { #[subdiagnostic] suggestion: MoreThanOneCharSugg, }, + #[diag(parse_nul_in_c_str)] + NulInCStr { + #[primary_span] + span: Span, + }, } #[derive(Subdiagnostic)] diff --git a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs index fbc77f2878081..3238f8e23bb0a 100644 --- a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs +++ b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs @@ -262,6 +262,9 @@ pub(crate) fn emit_unescape_error( EscapeError::LoneSlash => { dcx.emit_err(UnescapeError::LoneSlash(err_span)); } + EscapeError::NulInCStr => { + dcx.emit_err(UnescapeError::NulInCStr { span: err_span }); + } EscapeError::UnskippedWhitespaceWarning => { let (c, char_span) = last_char(); dcx.emit_warn(UnescapeError::UnskippedWhitespace { diff --git a/compiler/rustc_session/messages.ftl b/compiler/rustc_session/messages.ftl index 53bdef6dfa0fb..af8c962a2ed6a 100644 --- a/compiler/rustc_session/messages.ftl +++ b/compiler/rustc_session/messages.ftl @@ -75,8 +75,6 @@ session_not_circumvent_feature = `-Zunleash-the-miri-inside-of-you` may not be u session_not_supported = not supported -session_nul_in_c_str = null characters in C string literals are not supported - session_octal_float_literal_not_supported = octal float literal is not supported session_optimization_fuel_exhausted = optimization-fuel-exhausted: {$msg} diff --git a/compiler/rustc_session/src/errors.rs b/compiler/rustc_session/src/errors.rs index 21a206798af18..e19f0fd84de56 100644 --- a/compiler/rustc_session/src/errors.rs +++ b/compiler/rustc_session/src/errors.rs @@ -6,7 +6,7 @@ use rustc_errors::{ error_code, DiagCtxt, DiagnosticBuilder, DiagnosticMessage, IntoDiagnostic, Level, MultiSpan, }; use rustc_macros::Diagnostic; -use rustc_span::{BytePos, Span, Symbol}; +use rustc_span::{Span, Symbol}; use rustc_target::spec::{SplitDebuginfo, StackProtector, TargetTriple}; use crate::parse::ParseSess; @@ -346,13 +346,6 @@ pub(crate) struct BinaryFloatLiteralNotSupported { pub span: Span, } -#[derive(Diagnostic)] -#[diag(session_nul_in_c_str)] -pub(crate) struct NulInCStr { - #[primary_span] - pub span: Span, -} - pub fn report_lit_error(sess: &ParseSess, err: LitError, lit: token::Lit, span: Span) { // Checks if `s` looks like i32 or u1234 etc. fn looks_like_width_suffix(first_chars: &[char], s: &str) -> bool { @@ -432,12 +425,6 @@ pub fn report_lit_error(sess: &ParseSess, err: LitError, lit: token::Lit, span: }; dcx.emit_err(IntLiteralTooLarge { span, limit }); } - LitError::NulInCStr(range) => { - let lo = BytePos(span.lo().0 + range.start as u32 + 2); - let hi = BytePos(span.lo().0 + range.end as u32 + 2); - let span = span.with_lo(lo).with_hi(hi); - dcx.emit_err(NulInCStr { span }); - } } } diff --git a/src/tools/rust-analyzer/crates/parser/src/lexed_str.rs b/src/tools/rust-analyzer/crates/parser/src/lexed_str.rs index f47ec49df1d9f..aa25f82ae1d8d 100644 --- a/src/tools/rust-analyzer/crates/parser/src/lexed_str.rs +++ b/src/tools/rust-analyzer/crates/parser/src/lexed_str.rs @@ -369,6 +369,7 @@ fn error_to_diagnostic_message(error: EscapeError, mode: Mode) -> &'static str { "non-ASCII character in byte string literal" } EscapeError::NonAsciiCharInByte => "non-ASCII character in raw byte string literal", + EscapeError::NulInCStr => "null character in C string literal", EscapeError::UnskippedWhitespaceWarning => "", EscapeError::MultipleSkippedLinesWarning => "", } diff --git a/src/tools/rust-analyzer/crates/syntax/src/validation.rs b/src/tools/rust-analyzer/crates/syntax/src/validation.rs index 6c6916c585f1f..69dffbf79f191 100644 --- a/src/tools/rust-analyzer/crates/syntax/src/validation.rs +++ b/src/tools/rust-analyzer/crates/syntax/src/validation.rs @@ -106,6 +106,9 @@ fn rustc_unescape_error_to_string(err: unescape::EscapeError) -> (&'static str, EE::NonAsciiCharInByte => { "Byte literals must not contain non-ASCII characters" } + EE::NulInCStr => { + "C strings literals must not contain null characters" + } EE::UnskippedWhitespaceWarning => "Whitespace after this escape is not skipped", EE::MultipleSkippedLinesWarning => "Multiple lines are skipped by this escape", diff --git a/tests/ui/rfcs/rfc-3348-c-string-literals/no-nuls.rs b/tests/ui/rfcs/rfc-3348-c-string-literals/no-nuls.rs index e20ca50b88f90..2f9ca09f3a5de 100644 Binary files a/tests/ui/rfcs/rfc-3348-c-string-literals/no-nuls.rs and b/tests/ui/rfcs/rfc-3348-c-string-literals/no-nuls.rs differ diff --git a/tests/ui/rfcs/rfc-3348-c-string-literals/no-nuls.stderr b/tests/ui/rfcs/rfc-3348-c-string-literals/no-nuls.stderr index ff9006f6f97f1..a05dea3ff07e6 100644 Binary files a/tests/ui/rfcs/rfc-3348-c-string-literals/no-nuls.stderr and b/tests/ui/rfcs/rfc-3348-c-string-literals/no-nuls.stderr differ