Skip to content

Commit 7f99c7d

Browse files
committed
Add invalid_from_utf8 analogous to invalid_from_utf8_unchecked
1 parent a0612d9 commit 7f99c7d

File tree

7 files changed

+169
-21
lines changed

7 files changed

+169
-21
lines changed

compiler/rustc_lint/messages.ftl

+4
Original file line numberDiff line numberDiff line change
@@ -304,6 +304,10 @@ lint_improper_ctypes_union_layout_help = consider adding a `#[repr(C)]` or `#[re
304304
lint_improper_ctypes_union_layout_reason = this union has unspecified layout
305305
lint_improper_ctypes_union_non_exhaustive = this union is non-exhaustive
306306
307+
# FIXME: we should ordinalize $valid_up_to when we add support for doing so
308+
lint_invalid_from_utf8_checked = calls to `{$method}` with a invalid literal always return an error
309+
.label = the literal was valid UTF-8 up to the {$valid_up_to} bytes
310+
307311
# FIXME: we should ordinalize $valid_up_to when we add support for doing so
308312
lint_invalid_from_utf8_unchecked = calls to `{$method}` with a invalid literal are undefined behavior
309313
.label = the literal was valid UTF-8 up to the {$valid_up_to} bytes

compiler/rustc_lint/src/invalid_from_utf8.rs

+41-8
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ use rustc_hir::{Expr, ExprKind};
55
use rustc_span::source_map::Spanned;
66
use rustc_span::sym;
77

8-
use crate::lints::InvalidFromUtf8UncheckedDiag;
8+
use crate::lints::InvalidFromUtf8Diag;
99
use crate::{LateContext, LateLintPass, LintContext};
1010

1111
declare_lint! {
@@ -33,23 +33,56 @@ declare_lint! {
3333
"using a non UTF-8 literal in `std::str::from_utf8_unchecked`"
3434
}
3535

36-
declare_lint_pass!(InvalidFromUtf8 => [INVALID_FROM_UTF8_UNCHECKED]);
36+
declare_lint! {
37+
/// The `invalid_from_utf8` lint checks for calls to
38+
/// `std::str::from_utf8` and `std::str::from_utf8_mut`
39+
/// with an invalid UTF-8 literal.
40+
///
41+
/// ### Example
42+
///
43+
/// ```rust
44+
/// # #[allow(unused)]
45+
/// std::str::from_utf8(b"Ru\x82st");
46+
/// ```
47+
///
48+
/// {{produces}}
49+
///
50+
/// ### Explanation
51+
///
52+
/// Trying to create such a `str` would always return an error as per documentation
53+
/// for `std::str::from_utf8` and `std::str::from_utf8_mut`.
54+
pub INVALID_FROM_UTF8,
55+
Warn,
56+
"using a non UTF-8 literal in `std::str::from_utf8`"
57+
}
58+
59+
declare_lint_pass!(InvalidFromUtf8 => [INVALID_FROM_UTF8_UNCHECKED, INVALID_FROM_UTF8]);
3760

3861
impl<'tcx> LateLintPass<'tcx> for InvalidFromUtf8 {
3962
fn check_expr(&mut self, cx: &LateContext<'tcx>, expr: &'tcx Expr<'tcx>) {
4063
if let ExprKind::Call(path, [arg]) = expr.kind
4164
&& let ExprKind::Path(ref qpath) = path.kind
4265
&& let Some(def_id) = cx.qpath_res(qpath, path.hir_id).opt_def_id()
4366
&& let Some(diag_item) = cx.tcx.get_diagnostic_name(def_id)
44-
&& [sym::str_from_utf8_unchecked, sym::str_from_utf8_unchecked_mut].contains(&diag_item)
67+
&& [sym::str_from_utf8, sym::str_from_utf8_mut,
68+
sym::str_from_utf8_unchecked, sym::str_from_utf8_unchecked_mut].contains(&diag_item)
4569
{
4670
let lint = |utf8_error: Utf8Error| {
71+
let label = arg.span;
4772
let method = diag_item.as_str().strip_prefix("str_").unwrap();
48-
cx.emit_spanned_lint(INVALID_FROM_UTF8_UNCHECKED, expr.span, InvalidFromUtf8UncheckedDiag {
49-
method: format!("std::str::{method}"),
50-
valid_up_to: utf8_error.valid_up_to(),
51-
label: arg.span,
52-
})
73+
let method = format!("std::str::{method}");
74+
let valid_up_to = utf8_error.valid_up_to();
75+
let is_unchecked_variant = diag_item.as_str().contains("unchecked");
76+
77+
cx.emit_spanned_lint(
78+
if is_unchecked_variant { INVALID_FROM_UTF8_UNCHECKED } else { INVALID_FROM_UTF8 },
79+
expr.span,
80+
if is_unchecked_variant {
81+
InvalidFromUtf8Diag::Unchecked { method, valid_up_to, label }
82+
} else {
83+
InvalidFromUtf8Diag::Checked { method, valid_up_to, label }
84+
}
85+
)
5386
};
5487

5588
match &arg.kind {

compiler/rustc_lint/src/lints.rs

+15-6
Original file line numberDiff line numberDiff line change
@@ -701,12 +701,21 @@ pub struct ForgetCopyDiag<'a> {
701701

702702
// invalid_from_utf8.rs
703703
#[derive(LintDiagnostic)]
704-
#[diag(lint_invalid_from_utf8_unchecked)]
705-
pub struct InvalidFromUtf8UncheckedDiag {
706-
pub method: String,
707-
pub valid_up_to: usize,
708-
#[label]
709-
pub label: Span,
704+
pub enum InvalidFromUtf8Diag {
705+
#[diag(lint_invalid_from_utf8_unchecked)]
706+
Unchecked {
707+
method: String,
708+
valid_up_to: usize,
709+
#[label]
710+
label: Span,
711+
},
712+
#[diag(lint_invalid_from_utf8_checked)]
713+
Checked {
714+
method: String,
715+
valid_up_to: usize,
716+
#[label]
717+
label: Span,
718+
},
710719
}
711720

712721
// hidden_unicode_codepoints.rs

compiler/rustc_span/src/symbol.rs

+2
Original file line numberDiff line numberDiff line change
@@ -1454,6 +1454,8 @@ symbols! {
14541454
stop_after_dataflow,
14551455
store,
14561456
str,
1457+
str_from_utf8,
1458+
str_from_utf8_mut,
14571459
str_from_utf8_unchecked,
14581460
str_from_utf8_unchecked_mut,
14591461
str_split_whitespace,

library/core/src/str/converts.rs

+2
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@ use super::Utf8Error;
8484
#[stable(feature = "rust1", since = "1.0.0")]
8585
#[rustc_const_stable(feature = "const_str_from_utf8_shared", since = "1.63.0")]
8686
#[rustc_allow_const_fn_unstable(str_internals)]
87+
#[rustc_diagnostic_item = "str_from_utf8"]
8788
pub const fn from_utf8(v: &[u8]) -> Result<&str, Utf8Error> {
8889
// FIXME: This should use `?` again, once it's `const`
8990
match run_utf8_validation(v) {
@@ -127,6 +128,7 @@ pub const fn from_utf8(v: &[u8]) -> Result<&str, Utf8Error> {
127128
/// errors that can be returned.
128129
#[stable(feature = "str_mut_extras", since = "1.20.0")]
129130
#[rustc_const_unstable(feature = "const_str_from_utf8", issue = "91006")]
131+
#[rustc_diagnostic_item = "str_from_utf8_mut"]
130132
pub const fn from_utf8_mut(v: &mut [u8]) -> Result<&mut str, Utf8Error> {
131133
// This should use `?` again, once it's `const`
132134
match run_utf8_validation(v) {

tests/ui/lint/invalid_from_utf8.rs

+44
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
#![feature(concat_bytes)]
44
#![warn(invalid_from_utf8_unchecked)]
5+
#![warn(invalid_from_utf8)]
56

67
pub fn from_utf8_unchecked_mut() {
78
// Valid
@@ -46,4 +47,47 @@ pub fn from_utf8_unchecked() {
4647
}
4748
}
4849

50+
pub fn from_utf8_mut() {
51+
// Valid
52+
{
53+
std::str::from_utf8_mut(&mut [99, 108, 105, 112, 112, 121]);
54+
std::str::from_utf8_mut(&mut [b'c', b'l', b'i', b'p', b'p', b'y']);
55+
56+
let x = 0xa0;
57+
std::str::from_utf8_mut(&mut [0xc0, x]);
58+
}
59+
60+
// Invalid
61+
{
62+
std::str::from_utf8_mut(&mut [99, 108, 130, 105, 112, 112, 121]);
63+
//~^ WARN calls to `std::str::from_utf8_mut`
64+
std::str::from_utf8_mut(&mut [b'c', b'l', b'\x82', b'i', b'p', b'p', b'y']);
65+
//~^ WARN calls to `std::str::from_utf8_mut`
66+
}
67+
}
68+
69+
pub fn from_utf8() {
70+
// Valid
71+
{
72+
std::str::from_utf8(&[99, 108, 105, 112, 112, 121]);
73+
std::str::from_utf8(&[b'c', b'l', b'i', b'p', b'p', b'y']);
74+
std::str::from_utf8(b"clippy");
75+
76+
let x = 0xA0;
77+
std::str::from_utf8(&[0xC0, x]);
78+
}
79+
80+
// Invalid
81+
{
82+
std::str::from_utf8(&[99, 108, 130, 105, 112, 112, 121]);
83+
//~^ WARN calls to `std::str::from_utf8`
84+
std::str::from_utf8(&[b'c', b'l', b'\x82', b'i', b'p', b'p', b'y']);
85+
//~^ WARN calls to `std::str::from_utf8`
86+
std::str::from_utf8(b"cl\x82ippy");
87+
//~^ WARN calls to `std::str::from_utf8`
88+
std::str::from_utf8(concat_bytes!(b"cl", b"\x82ippy"));
89+
//~^ WARN calls to `std::str::from_utf8`
90+
}
91+
}
92+
4993
fn main() {}
+61-7
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
warning: calls to `std::str::from_utf8_unchecked_mut` with a invalid literal are undefined behavior
2-
--> $DIR/invalid_from_utf8.rs:18:9
2+
--> $DIR/invalid_from_utf8.rs:19:9
33
|
44
LL | std::str::from_utf8_unchecked_mut(&mut [99, 108, 130, 105, 112, 112, 121]);
55
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^---------------------------------------^
@@ -13,44 +13,98 @@ LL | #![warn(invalid_from_utf8_unchecked)]
1313
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^
1414

1515
warning: calls to `std::str::from_utf8_unchecked_mut` with a invalid literal are undefined behavior
16-
--> $DIR/invalid_from_utf8.rs:20:9
16+
--> $DIR/invalid_from_utf8.rs:21:9
1717
|
1818
LL | std::str::from_utf8_unchecked_mut(&mut [b'c', b'l', b'\x82', b'i', b'p', b'p', b'y']);
1919
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^--------------------------------------------------^
2020
| |
2121
| the literal was valid UTF-8 up to the 2 bytes
2222

2323
warning: calls to `std::str::from_utf8_unchecked` with a invalid literal are undefined behavior
24-
--> $DIR/invalid_from_utf8.rs:38:9
24+
--> $DIR/invalid_from_utf8.rs:39:9
2525
|
2626
LL | std::str::from_utf8_unchecked(&[99, 108, 130, 105, 112, 112, 121]);
2727
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^-----------------------------------^
2828
| |
2929
| the literal was valid UTF-8 up to the 2 bytes
3030

3131
warning: calls to `std::str::from_utf8_unchecked` with a invalid literal are undefined behavior
32-
--> $DIR/invalid_from_utf8.rs:40:9
32+
--> $DIR/invalid_from_utf8.rs:41:9
3333
|
3434
LL | std::str::from_utf8_unchecked(&[b'c', b'l', b'\x82', b'i', b'p', b'p', b'y']);
3535
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^----------------------------------------------^
3636
| |
3737
| the literal was valid UTF-8 up to the 2 bytes
3838

3939
warning: calls to `std::str::from_utf8_unchecked` with a invalid literal are undefined behavior
40-
--> $DIR/invalid_from_utf8.rs:42:9
40+
--> $DIR/invalid_from_utf8.rs:43:9
4141
|
4242
LL | std::str::from_utf8_unchecked(b"cl\x82ippy");
4343
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^-------------^
4444
| |
4545
| the literal was valid UTF-8 up to the 2 bytes
4646

4747
warning: calls to `std::str::from_utf8_unchecked` with a invalid literal are undefined behavior
48-
--> $DIR/invalid_from_utf8.rs:44:9
48+
--> $DIR/invalid_from_utf8.rs:45:9
4949
|
5050
LL | std::str::from_utf8_unchecked(concat_bytes!(b"cl", b"\x82ippy"));
5151
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^---------------------------------^
5252
| |
5353
| the literal was valid UTF-8 up to the 2 bytes
5454

55-
warning: 6 warnings emitted
55+
warning: calls to `std::str::from_utf8_mut` with a invalid literal always return an error
56+
--> $DIR/invalid_from_utf8.rs:62:9
57+
|
58+
LL | std::str::from_utf8_mut(&mut [99, 108, 130, 105, 112, 112, 121]);
59+
| ^^^^^^^^^^^^^^^^^^^^^^^^---------------------------------------^
60+
| |
61+
| the literal was valid UTF-8 up to the 2 bytes
62+
|
63+
note: the lint level is defined here
64+
--> $DIR/invalid_from_utf8.rs:5:9
65+
|
66+
LL | #![warn(invalid_from_utf8)]
67+
| ^^^^^^^^^^^^^^^^^
68+
69+
warning: calls to `std::str::from_utf8_mut` with a invalid literal always return an error
70+
--> $DIR/invalid_from_utf8.rs:64:9
71+
|
72+
LL | std::str::from_utf8_mut(&mut [b'c', b'l', b'\x82', b'i', b'p', b'p', b'y']);
73+
| ^^^^^^^^^^^^^^^^^^^^^^^^--------------------------------------------------^
74+
| |
75+
| the literal was valid UTF-8 up to the 2 bytes
76+
77+
warning: calls to `std::str::from_utf8` with a invalid literal always return an error
78+
--> $DIR/invalid_from_utf8.rs:82:9
79+
|
80+
LL | std::str::from_utf8(&[99, 108, 130, 105, 112, 112, 121]);
81+
| ^^^^^^^^^^^^^^^^^^^^-----------------------------------^
82+
| |
83+
| the literal was valid UTF-8 up to the 2 bytes
84+
85+
warning: calls to `std::str::from_utf8` with a invalid literal always return an error
86+
--> $DIR/invalid_from_utf8.rs:84:9
87+
|
88+
LL | std::str::from_utf8(&[b'c', b'l', b'\x82', b'i', b'p', b'p', b'y']);
89+
| ^^^^^^^^^^^^^^^^^^^^----------------------------------------------^
90+
| |
91+
| the literal was valid UTF-8 up to the 2 bytes
92+
93+
warning: calls to `std::str::from_utf8` with a invalid literal always return an error
94+
--> $DIR/invalid_from_utf8.rs:86:9
95+
|
96+
LL | std::str::from_utf8(b"cl\x82ippy");
97+
| ^^^^^^^^^^^^^^^^^^^^-------------^
98+
| |
99+
| the literal was valid UTF-8 up to the 2 bytes
100+
101+
warning: calls to `std::str::from_utf8` with a invalid literal always return an error
102+
--> $DIR/invalid_from_utf8.rs:88:9
103+
|
104+
LL | std::str::from_utf8(concat_bytes!(b"cl", b"\x82ippy"));
105+
| ^^^^^^^^^^^^^^^^^^^^---------------------------------^
106+
| |
107+
| the literal was valid UTF-8 up to the 2 bytes
108+
109+
warning: 12 warnings emitted
56110

0 commit comments

Comments
 (0)