Skip to content

Commit 8bccceb

Browse files
committed
separate messages for individual categories
1 parent 23a3d77 commit 8bccceb

File tree

8 files changed

+79
-19
lines changed

8 files changed

+79
-19
lines changed

compiler/rustc_lint/messages.ftl

+21-2
Original file line numberDiff line numberDiff line change
@@ -241,9 +241,28 @@ lint_hidden_unicode_codepoints = unicode codepoint changing visible direction of
241241
lint_identifier_non_ascii_char = identifier contains non-ASCII characters
242242
243243
lint_identifier_uncommon_codepoints = identifier contains {$codepoints_len ->
244-
[one] an uncommon Unicode codepoint
245-
*[other] uncommon Unicode codepoints
244+
[one] { $identifier_type ->
245+
[Exclusion] a character from an archaic script
246+
[Technical] a character that is for non-linguistic, specialized usage
247+
[Limited_Use] a character from a script in limited use
248+
[Not_NFKC] a non normalized (NFKC) character
249+
*[other] an uncommon character
250+
}
251+
*[other] { $identifier_type ->
252+
[Exclusion] {$codepoints_len} characters from archaic scripts
253+
[Technical] {$codepoints_len} characters that are for non-linguistic, specialized usage
254+
[Limited_Use] {$codepoints_len} characters from scripts in limited use
255+
[Not_NFKC] {$codepoints_len} non normalized (NFKC) characters
256+
*[other] uncommon characters
257+
}
246258
}: {$codepoints}
259+
.note = {$codepoints_len ->
260+
[one] this character is
261+
*[other] these characters are
262+
} included in the{$identifier_type ->
263+
[Restricted] {""}
264+
*[other] {" "}{$identifier_type}
265+
} Unicode general security profile
247266
248267
lint_ignored_unless_crate_specified = {$level}({$name}) is ignored unless specified at crate level
249268

compiler/rustc_lint/src/lib.rs

+1
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
#![feature(array_windows)]
3232
#![feature(box_patterns)]
3333
#![feature(control_flow_enum)]
34+
#![feature(extract_if)]
3435
#![feature(generic_nonzero)]
3536
#![feature(if_let_guard)]
3637
#![feature(iter_order_by)]

compiler/rustc_lint/src/lints.rs

+2
Original file line numberDiff line numberDiff line change
@@ -1098,9 +1098,11 @@ pub struct IdentifierNonAsciiChar;
10981098

10991099
#[derive(LintDiagnostic)]
11001100
#[diag(lint_identifier_uncommon_codepoints)]
1101+
#[note]
11011102
pub struct IdentifierUncommonCodepoints {
11021103
pub codepoints: Vec<char>,
11031104
pub codepoints_len: usize,
1105+
pub identifier_type: &'static str,
11041106
}
11051107

11061108
#[derive(LintDiagnostic)]

compiler/rustc_lint/src/non_ascii_idents.rs

+39-8
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ use rustc_ast as ast;
77
use rustc_data_structures::fx::FxIndexMap;
88
use rustc_data_structures::unord::UnordMap;
99
use rustc_span::symbol::Symbol;
10+
use unicode_security::general_security_profile::IdentifierType;
1011

1112
declare_lint! {
1213
/// The `non_ascii_idents` lint detects non-ASCII identifiers.
@@ -189,17 +190,47 @@ impl EarlyLintPass for NonAsciiIdents {
189190
if check_uncommon_codepoints
190191
&& !symbol_str.chars().all(GeneralSecurityProfile::identifier_allowed)
191192
{
192-
let codepoints: Vec<_> = symbol_str
193+
let mut chars: Vec<_> = symbol_str
193194
.chars()
194-
.filter(|c| !GeneralSecurityProfile::identifier_allowed(*c))
195+
.map(|c| (c, GeneralSecurityProfile::identifier_type(c)))
195196
.collect();
196-
let codepoints_len = codepoints.len();
197197

198-
cx.emit_span_lint(
199-
UNCOMMON_CODEPOINTS,
200-
sp,
201-
IdentifierUncommonCodepoints { codepoints, codepoints_len },
202-
);
198+
for (id_ty, id_ty_descr) in [
199+
(IdentifierType::Exclusion, "Exclusion"),
200+
(IdentifierType::Technical, "Technical"),
201+
(IdentifierType::Limited_Use, "Limited_Use"),
202+
(IdentifierType::Not_NFKC, "Not_NFKC"),
203+
] {
204+
let codepoints: Vec<_> =
205+
chars.extract_if(|(_, ty)| *ty == Some(id_ty)).collect();
206+
if codepoints.is_empty() {
207+
continue;
208+
}
209+
cx.emit_span_lint(
210+
UNCOMMON_CODEPOINTS,
211+
sp,
212+
IdentifierUncommonCodepoints {
213+
codepoints_len: codepoints.len(),
214+
codepoints: codepoints.into_iter().map(|(c, _)| c).collect(),
215+
identifier_type: id_ty_descr,
216+
},
217+
);
218+
}
219+
220+
let remaining = chars
221+
.extract_if(|(c, _)| !GeneralSecurityProfile::identifier_allowed(*c))
222+
.collect::<Vec<_>>();
223+
if !remaining.is_empty() {
224+
cx.emit_span_lint(
225+
UNCOMMON_CODEPOINTS,
226+
sp,
227+
IdentifierUncommonCodepoints {
228+
codepoints_len: remaining.len(),
229+
codepoints: remaining.into_iter().map(|(c, _)| c).collect(),
230+
identifier_type: "Restricted",
231+
},
232+
);
233+
}
203234
}
204235
}
205236

tests/ui/lexer/lex-emoji-identifiers.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ fn invalid_emoji_usages() {
44
let wireless🛜 = "basic emoji"; //~ ERROR: identifiers cannot contain emoji
55
// FIXME
66
let key1️⃣ = "keycap sequence"; //~ ERROR: unknown start of token
7-
//~^ WARN: identifier contains an uncommon Unicode codepoint
7+
//~^ WARN: identifier contains an uncommon character: '\u{fe0f}'
88
let flag🇺🇳 = "flag sequence"; //~ ERROR: identifiers cannot contain emoji
99
let wales🏴 = "tag sequence"; //~ ERROR: identifiers cannot contain emoji
1010
let folded🙏🏿 = "modifier sequence"; //~ ERROR: identifiers cannot contain emoji

tests/ui/lexer/lex-emoji-identifiers.stderr

+2-1
Original file line numberDiff line numberDiff line change
@@ -40,12 +40,13 @@ error: identifiers cannot contain emoji: `folded🙏🏿`
4040
LL | let folded🙏🏿 = "modifier sequence";
4141
| ^^^^^^^^^^
4242

43-
warning: identifier contains an uncommon Unicode codepoint: '\u{fe0f}'
43+
warning: identifier contains an uncommon character: '\u{fe0f}'
4444
--> $DIR/lex-emoji-identifiers.rs:6:9
4545
|
4646
LL | let key1️⃣ = "keycap sequence";
4747
| ^^^^
4848
|
49+
= note: this character is included in the Unicode general security profile
4950
= note: `#[warn(uncommon_codepoints)]` on by default
5051

5152
error: aborting due to 7 previous errors; 1 warning emitted

tests/ui/lint/rfc-2457-non-ascii-idents/lint-uncommon-codepoints.rs

+4-3
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,13 @@
11
#![deny(uncommon_codepoints)]
22

3-
const µ: f64 = 0.000001; //~ ERROR identifier contains an uncommon Unicode codepoint
3+
const µ: f64 = 0.000001; //~ identifier contains a non normalized (NFKC) character: 'µ'
44
//~| WARNING should have an upper case name
55

6-
fn dijkstra() {} //~ ERROR identifier contains an uncommon Unicode codepoint
6+
fn dijkstra() {}
7+
//~^ ERROR identifier contains a non normalized (NFKC) character: 'ij'
78

89
fn main() {
9-
let ㇻㇲㇳ = "rust"; //~ ERROR identifier contains uncommon Unicode codepoints
10+
let ㇻㇲㇳ = "rust"; //~ ERROR identifier contains uncommon characters: 'ㇻ', 'ㇲ', and 'ㇳ'
1011

1112
// using the same identifier the second time won't trigger the lint.
1213
println!("{}", ㇻㇲㇳ);

tests/ui/lint/rfc-2457-non-ascii-idents/lint-uncommon-codepoints.stderr

+9-4
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,31 @@
1-
error: identifier contains an uncommon Unicode codepoint: 'µ'
1+
error: identifier contains a non normalized (NFKC) character: 'µ'
22
--> $DIR/lint-uncommon-codepoints.rs:3:7
33
|
44
LL | const µ: f64 = 0.000001;
55
| ^
66
|
7+
= note: this character is included in the Not_NFKC Unicode general security profile
78
note: the lint level is defined here
89
--> $DIR/lint-uncommon-codepoints.rs:1:9
910
|
1011
LL | #![deny(uncommon_codepoints)]
1112
| ^^^^^^^^^^^^^^^^^^^
1213

13-
error: identifier contains an uncommon Unicode codepoint: 'ij'
14+
error: identifier contains a non normalized (NFKC) character: 'ij'
1415
--> $DIR/lint-uncommon-codepoints.rs:6:4
1516
|
1617
LL | fn dijkstra() {}
1718
| ^^^^^^^
19+
|
20+
= note: this character is included in the Not_NFKC Unicode general security profile
1821

19-
error: identifier contains uncommon Unicode codepoints: 'ㇻ', 'ㇲ', and 'ㇳ'
20-
--> $DIR/lint-uncommon-codepoints.rs:9:9
22+
error: identifier contains uncommon characters: 'ㇻ', 'ㇲ', and 'ㇳ'
23+
--> $DIR/lint-uncommon-codepoints.rs:10:9
2124
|
2225
LL | let ㇻㇲㇳ = "rust";
2326
| ^^^^^^
27+
|
28+
= note: these characters are included in the Unicode general security profile
2429

2530
warning: constant `µ` should have an upper case name
2631
--> $DIR/lint-uncommon-codepoints.rs:3:7

0 commit comments

Comments
 (0)