Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 9afe7e6

Browse files
committedFeb 19, 2024·
separate messages for individual categories
1 parent 23a3d77 commit 9afe7e6

File tree

7 files changed

+78
-15
lines changed

7 files changed

+78
-15
lines changed
 

‎compiler/rustc_lint/messages.ftl

+24-2
Original file line numberDiff line numberDiff line change
@@ -241,9 +241,31 @@ lint_hidden_unicode_codepoints = unicode codepoint changing visible direction of
241241
lint_identifier_non_ascii_char = identifier contains non-ASCII characters
242242
243243
lint_identifier_uncommon_codepoints = identifier contains {$codepoints_len ->
244-
[one] an uncommon Unicode codepoint
245-
*[other] uncommon Unicode codepoints
244+
[one] { $identifier_type ->
245+
[Exclusion] a Unicode codepoint whose corresponding script is no longer used
246+
[Technical] a Unicode codepoint that has specialized usage
247+
[Limited_Use] a Unicode codepoint that is in limited use
248+
[Not_NFKC] a Unicode codepoint that is not used in normalized strings
249+
*[other] an uncommon Unicode codepoint
250+
}
251+
*[other] { $identifier_type ->
252+
[Exclusion] {$codepoints_len} Unicode codepoints that their corresponding scripts are no longer used
253+
[Technical] {$codepoints_len} Unicode codepoints that have specialized usage
254+
[Limited_Use] {$codepoints_len} Unicode codepoints that are in limited use
255+
[Not_NFKC] {$codepoints_len} Unicode codepoints that are not used in normalized strings
256+
*[other] uncommon Unicode codepoints
257+
}
246258
}: {$codepoints}
259+
.note = {$codepoints_len ->
260+
[one] { $identifier_type ->
261+
[Restricted] this Unicode codepoint is included in the Unicode general security profile
262+
*[other] this character is included in the {$identifier_type} Unicode general security profile
263+
}
264+
*[other] { $identifier_type ->
265+
[Restricted] these Unicode codepoints are included in the Unicode general security profile
266+
*[other] these characters are included in the {$identifier_type} Unicode general security profile
267+
}
268+
}
247269
248270
lint_ignored_unless_crate_specified = {$level}({$name}) is ignored unless specified at crate level
249271

‎compiler/rustc_lint/src/lib.rs

+1
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
#![feature(array_windows)]
3232
#![feature(box_patterns)]
3333
#![feature(control_flow_enum)]
34+
#![feature(extract_if)]
3435
#![feature(generic_nonzero)]
3536
#![feature(if_let_guard)]
3637
#![feature(iter_order_by)]

‎compiler/rustc_lint/src/lints.rs

+2
Original file line numberDiff line numberDiff line change
@@ -1098,9 +1098,11 @@ pub struct IdentifierNonAsciiChar;
10981098

10991099
#[derive(LintDiagnostic)]
11001100
#[diag(lint_identifier_uncommon_codepoints)]
1101+
#[note]
11011102
pub struct IdentifierUncommonCodepoints {
11021103
pub codepoints: Vec<char>,
11031104
pub codepoints_len: usize,
1105+
pub identifier_type: &'static str,
11041106
}
11051107

11061108
#[derive(LintDiagnostic)]

‎compiler/rustc_lint/src/non_ascii_idents.rs

+39-8
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ use rustc_ast as ast;
77
use rustc_data_structures::fx::FxIndexMap;
88
use rustc_data_structures::unord::UnordMap;
99
use rustc_span::symbol::Symbol;
10+
use unicode_security::general_security_profile::IdentifierType;
1011

1112
declare_lint! {
1213
/// The `non_ascii_idents` lint detects non-ASCII identifiers.
@@ -189,17 +190,47 @@ impl EarlyLintPass for NonAsciiIdents {
189190
if check_uncommon_codepoints
190191
&& !symbol_str.chars().all(GeneralSecurityProfile::identifier_allowed)
191192
{
192-
let codepoints: Vec<_> = symbol_str
193+
let mut chars: Vec<_> = symbol_str
193194
.chars()
194-
.filter(|c| !GeneralSecurityProfile::identifier_allowed(*c))
195+
.map(|c| (c, GeneralSecurityProfile::identifier_type(c)))
195196
.collect();
196-
let codepoints_len = codepoints.len();
197197

198-
cx.emit_span_lint(
199-
UNCOMMON_CODEPOINTS,
200-
sp,
201-
IdentifierUncommonCodepoints { codepoints, codepoints_len },
202-
);
198+
for (id_ty, id_ty_descr) in [
199+
(IdentifierType::Exclusion, "Exclusion"),
200+
(IdentifierType::Technical, "Technical"),
201+
(IdentifierType::Limited_Use, "Limited_Use"),
202+
(IdentifierType::Not_NFKC, "Not_NFKC"),
203+
] {
204+
let codepoints: Vec<_> =
205+
chars.extract_if(|(_, ty)| *ty == Some(id_ty)).collect();
206+
if codepoints.is_empty() {
207+
continue;
208+
}
209+
cx.emit_span_lint(
210+
UNCOMMON_CODEPOINTS,
211+
sp,
212+
IdentifierUncommonCodepoints {
213+
codepoints_len: codepoints.len(),
214+
codepoints: codepoints.into_iter().map(|(c, _)| c).collect(),
215+
identifier_type: id_ty_descr,
216+
},
217+
);
218+
}
219+
220+
let remaining = chars
221+
.extract_if(|(c, _)| !GeneralSecurityProfile::identifier_allowed(*c))
222+
.collect::<Vec<_>>();
223+
if !remaining.is_empty() {
224+
cx.emit_span_lint(
225+
UNCOMMON_CODEPOINTS,
226+
sp,
227+
IdentifierUncommonCodepoints {
228+
codepoints_len: remaining.len(),
229+
codepoints: remaining.into_iter().map(|(c, _)| c).collect(),
230+
identifier_type: "Restricted",
231+
},
232+
);
233+
}
203234
}
204235
}
205236

‎tests/ui/lexer/lex-emoji-identifiers.stderr

+1
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ warning: identifier contains an uncommon Unicode codepoint: '\u{fe0f}'
4646
LL | let key1️⃣ = "keycap sequence";
4747
| ^^^^
4848
|
49+
= note: this Unicode codepoint is included in the Unicode general security profile
4950
= note: `#[warn(uncommon_codepoints)]` on by default
5051

5152
error: aborting due to 7 previous errors; 1 warning emitted

‎tests/ui/lint/rfc-2457-non-ascii-idents/lint-uncommon-codepoints.rs

+3-2
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
11
#![deny(uncommon_codepoints)]
22

3-
const µ: f64 = 0.000001; //~ ERROR identifier contains an uncommon Unicode codepoint
3+
const µ: f64 = 0.000001; //~ identifier contains a Unicode codepoint that is not used in normalized strings: 'µ'
44
//~| WARNING should have an upper case name
55

6-
fn dijkstra() {} //~ ERROR identifier contains an uncommon Unicode codepoint
6+
fn dijkstra() {}
7+
//~^ ERROR identifier contains a Unicode codepoint that is not used in normalized strings: 'ij'
78

89
fn main() {
910
let ㇻㇲㇳ = "rust"; //~ ERROR identifier contains uncommon Unicode codepoints

‎tests/ui/lint/rfc-2457-non-ascii-idents/lint-uncommon-codepoints.stderr

+8-3
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,31 @@
1-
error: identifier contains an uncommon Unicode codepoint: 'µ'
1+
error: identifier contains a Unicode codepoint that is not used in normalized strings: 'µ'
22
--> $DIR/lint-uncommon-codepoints.rs:3:7
33
|
44
LL | const µ: f64 = 0.000001;
55
| ^
66
|
7+
= note: this character is included in the Not_NFKC Unicode general security profile
78
note: the lint level is defined here
89
--> $DIR/lint-uncommon-codepoints.rs:1:9
910
|
1011
LL | #![deny(uncommon_codepoints)]
1112
| ^^^^^^^^^^^^^^^^^^^
1213

13-
error: identifier contains an uncommon Unicode codepoint: 'ij'
14+
error: identifier contains a Unicode codepoint that is not used in normalized strings: 'ij'
1415
--> $DIR/lint-uncommon-codepoints.rs:6:4
1516
|
1617
LL | fn dijkstra() {}
1718
| ^^^^^^^
19+
|
20+
= note: this character is included in the Not_NFKC Unicode general security profile
1821

1922
error: identifier contains uncommon Unicode codepoints: 'ㇻ', 'ㇲ', and 'ㇳ'
20-
--> $DIR/lint-uncommon-codepoints.rs:9:9
23+
--> $DIR/lint-uncommon-codepoints.rs:10:9
2124
|
2225
LL | let ㇻㇲㇳ = "rust";
2326
| ^^^^^^
27+
|
28+
= note: these Unicode codepoints are included in the Unicode general security profile
2429

2530
warning: constant `µ` should have an upper case name
2631
--> $DIR/lint-uncommon-codepoints.rs:3:7

0 commit comments

Comments
 (0)