Skip to content

Commit ef24faf

Browse files
committed
Refactor non_ascii_idents lints, exclude ascii pair for confusable_idents lint.
1 parent 1a4e2b6 commit ef24faf

10 files changed

+131
-157
lines changed

src/librustc_lint/non_ascii_idents.rs

+104-131
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,7 @@
11
use crate::{EarlyContext, EarlyLintPass, LintContext};
22
use rustc_ast::ast;
33
use rustc_data_structures::fx::FxHashMap;
4-
use rustc_span::symbol::{Ident, SymbolStr};
5-
use std::hash::{Hash, Hasher};
6-
use std::ops::Deref;
4+
use rustc_span::symbol::SymbolStr;
75

86
declare_lint! {
97
pub NON_ASCII_IDENTS,
@@ -19,158 +17,133 @@ declare_lint! {
1917
crate_level_only
2018
}
2119

22-
// FIXME: Change this to warn.
2320
declare_lint! {
2421
pub CONFUSABLE_IDENTS,
25-
Allow,
22+
Warn,
2623
"detects visually confusable pairs between identifiers",
2724
crate_level_only
2825
}
2926

3027
declare_lint_pass!(NonAsciiIdents => [NON_ASCII_IDENTS, UNCOMMON_CODEPOINTS, CONFUSABLE_IDENTS]);
3128

32-
enum CowBoxSymStr {
33-
Interned(SymbolStr),
34-
Owned(Box<str>),
35-
}
36-
37-
impl Deref for CowBoxSymStr {
38-
type Target = str;
39-
40-
fn deref(&self) -> &str {
41-
match self {
42-
CowBoxSymStr::Interned(interned) => interned,
43-
CowBoxSymStr::Owned(ref owned) => owned,
44-
}
45-
}
46-
}
47-
48-
impl Hash for CowBoxSymStr {
49-
#[inline]
50-
fn hash<H: Hasher>(&self, state: &mut H) {
51-
Hash::hash(&**self, state)
52-
}
53-
}
54-
55-
impl PartialEq<CowBoxSymStr> for CowBoxSymStr {
56-
#[inline]
57-
fn eq(&self, other: &CowBoxSymStr) -> bool {
58-
PartialEq::eq(&**self, &**other)
59-
}
60-
}
61-
62-
impl Eq for CowBoxSymStr {}
63-
64-
fn calc_skeleton(symbol_str: SymbolStr, buffer: &'_ mut String) -> CowBoxSymStr {
65-
use std::mem::swap;
66-
use unicode_security::confusable_detection::skeleton;
67-
buffer.clear();
68-
buffer.extend(skeleton(&symbol_str));
69-
if symbol_str == *buffer {
70-
CowBoxSymStr::Interned(symbol_str)
71-
} else {
72-
let mut owned = String::new();
73-
swap(buffer, &mut owned);
74-
CowBoxSymStr::Owned(owned.into_boxed_str())
75-
}
76-
}
77-
78-
fn is_in_ascii_confusable_closure(c: char) -> bool {
79-
// FIXME: move this table to `unicode_security` crate.
80-
// data here corresponds to Unicode 13.
81-
const ASCII_CONFUSABLE_CLOSURE: &[(u64, u64)] = &[(0x00, 0x7f), (0xba, 0xba), (0x2080, 0x2080)];
82-
let c = c as u64;
83-
for &(range_start, range_end) in ASCII_CONFUSABLE_CLOSURE {
84-
if c >= range_start && c <= range_end {
85-
return true;
86-
}
87-
}
88-
false
89-
}
90-
91-
fn is_in_ascii_confusable_closure_relevant_list(c: char) -> bool {
92-
// FIXME: move this table to `unicode_security` crate.
93-
// data here corresponds to Unicode 13.
94-
const ASCII_CONFUSABLE_CLOSURE_RELEVANT_LIST: &[u64] = &[
95-
0x22, 0x25, 0x27, 0x2f, 0x30, 0x31, 0x49, 0x4f, 0x60, 0x6c, 0x6d, 0x6e, 0x72, 0x7c, 0xba,
96-
0x2080,
97-
];
98-
let c = c as u64;
99-
for &item in ASCII_CONFUSABLE_CLOSURE_RELEVANT_LIST {
100-
if c == item {
101-
return true;
102-
}
103-
}
104-
false
105-
}
106-
10729
impl EarlyLintPass for NonAsciiIdents {
10830
fn check_crate(&mut self, cx: &EarlyContext<'_>, _: &ast::Crate) {
10931
use rustc_session::lint::Level;
110-
if cx.builder.lint_level(CONFUSABLE_IDENTS).0 == Level::Allow {
32+
use rustc_span::Span;
33+
use unicode_security::GeneralSecurityProfile;
34+
use utils::CowBoxSymStr;
35+
36+
let check_non_ascii_idents = cx.builder.lint_level(NON_ASCII_IDENTS).0 != Level::Allow;
37+
let check_uncommon_codepoints =
38+
cx.builder.lint_level(UNCOMMON_CODEPOINTS).0 != Level::Allow;
39+
let check_confusable_idents = cx.builder.lint_level(CONFUSABLE_IDENTS).0 != Level::Allow;
40+
41+
if !check_non_ascii_idents && !check_uncommon_codepoints && !check_confusable_idents {
11142
return;
11243
}
44+
45+
let mut has_non_ascii_idents = false;
11346
let symbols = cx.sess.parse_sess.symbol_gallery.symbols.lock();
114-
let mut symbol_strs_and_spans = Vec::with_capacity(symbols.len());
115-
let mut in_fast_path = true;
116-
for (symbol, sp) in symbols.iter() {
117-
// fast path
47+
for (symbol, &sp) in symbols.iter() {
11848
let symbol_str = symbol.as_str();
119-
if !symbol_str.chars().all(is_in_ascii_confusable_closure) {
120-
// fallback to slow path.
121-
symbol_strs_and_spans.clear();
122-
in_fast_path = false;
123-
break;
49+
if symbol_str.is_ascii() {
50+
continue;
12451
}
125-
if symbol_str.chars().any(is_in_ascii_confusable_closure_relevant_list) {
126-
symbol_strs_and_spans.push((symbol_str, *sp));
52+
has_non_ascii_idents = true;
53+
cx.struct_span_lint(NON_ASCII_IDENTS, sp, |lint| {
54+
lint.build("identifier contains non-ASCII characters").emit()
55+
});
56+
if check_uncommon_codepoints
57+
&& !symbol_str.chars().all(GeneralSecurityProfile::identifier_allowed)
58+
{
59+
cx.struct_span_lint(UNCOMMON_CODEPOINTS, sp, |lint| {
60+
lint.build("identifier contains uncommon Unicode codepoints").emit()
61+
})
12762
}
12863
}
129-
if !in_fast_path {
130-
// slow path
131-
for (symbol, sp) in symbols.iter() {
64+
65+
if has_non_ascii_idents && check_confusable_idents {
66+
let mut skeleton_map: FxHashMap<CowBoxSymStr, (SymbolStr, Span, bool)> =
67+
FxHashMap::with_capacity_and_hasher(symbols.len(), Default::default());
68+
let mut str_buf = String::new();
69+
for (symbol, &sp) in symbols.iter() {
70+
fn calc_skeleton(symbol_str: &SymbolStr, buffer: &mut String) -> CowBoxSymStr {
71+
use std::mem::replace;
72+
use unicode_security::confusable_detection::skeleton;
73+
buffer.clear();
74+
buffer.extend(skeleton(symbol_str));
75+
if *symbol_str == *buffer {
76+
CowBoxSymStr::Interned(symbol_str.clone())
77+
} else {
78+
let owned = replace(buffer, String::new());
79+
CowBoxSymStr::Owned(owned.into_boxed_str())
80+
}
81+
}
13282
let symbol_str = symbol.as_str();
133-
symbol_strs_and_spans.push((symbol_str, *sp));
83+
let is_ascii = symbol_str.is_ascii();
84+
let skeleton = calc_skeleton(&symbol_str, &mut str_buf);
85+
skeleton_map
86+
.entry(skeleton)
87+
.and_modify(|(existing_symbolstr, existing_span, existing_is_ascii)| {
88+
if !*existing_is_ascii || !is_ascii {
89+
cx.struct_span_lint(CONFUSABLE_IDENTS, sp, |lint| {
90+
lint.build(&format!(
91+
"identifier pair considered confusable between `{}` and `{}`",
92+
existing_symbolstr, symbol_str
93+
))
94+
.span_label(
95+
*existing_span,
96+
"this is where the previous identifier occurred",
97+
)
98+
.emit();
99+
});
100+
}
101+
if *existing_is_ascii && !is_ascii {
102+
*existing_symbolstr = symbol_str.clone();
103+
*existing_span = sp;
104+
*existing_is_ascii = is_ascii;
105+
}
106+
})
107+
.or_insert((symbol_str, sp, is_ascii));
134108
}
135109
}
136-
drop(symbols);
137-
symbol_strs_and_spans.sort_by_key(|x| x.0.clone());
138-
let mut skeleton_map =
139-
FxHashMap::with_capacity_and_hasher(symbol_strs_and_spans.len(), Default::default());
140-
let mut str_buf = String::new();
141-
for (symbol_str, sp) in symbol_strs_and_spans {
142-
let skeleton = calc_skeleton(symbol_str.clone(), &mut str_buf);
143-
skeleton_map
144-
.entry(skeleton)
145-
.and_modify(|(existing_symbolstr, existing_span)| {
146-
cx.struct_span_lint(CONFUSABLE_IDENTS, sp, |lint| {
147-
lint.build(&format!(
148-
"identifier pair considered confusable between `{}` and `{}`",
149-
existing_symbolstr, symbol_str
150-
))
151-
.span_label(
152-
*existing_span,
153-
"this is where the previous identifier occurred",
154-
)
155-
.emit();
156-
});
157-
})
158-
.or_insert((symbol_str, sp));
110+
}
111+
}
112+
113+
mod utils {
114+
use rustc_span::symbol::SymbolStr;
115+
use std::hash::{Hash, Hasher};
116+
use std::ops::Deref;
117+
118+
pub(super) enum CowBoxSymStr {
119+
Interned(SymbolStr),
120+
Owned(Box<str>),
121+
}
122+
123+
impl Deref for CowBoxSymStr {
124+
type Target = str;
125+
126+
fn deref(&self) -> &str {
127+
match self {
128+
CowBoxSymStr::Interned(interned) => interned,
129+
CowBoxSymStr::Owned(ref owned) => owned,
130+
}
159131
}
160132
}
161-
fn check_ident(&mut self, cx: &EarlyContext<'_>, ident: Ident) {
162-
use unicode_security::GeneralSecurityProfile;
163-
let name_str = ident.name.as_str();
164-
if name_str.is_ascii() {
165-
return;
133+
134+
impl Hash for CowBoxSymStr {
135+
#[inline]
136+
fn hash<H: Hasher>(&self, state: &mut H) {
137+
Hash::hash(&**self, state)
166138
}
167-
cx.struct_span_lint(NON_ASCII_IDENTS, ident.span, |lint| {
168-
lint.build("identifier contains non-ASCII characters").emit()
169-
});
170-
if !name_str.chars().all(GeneralSecurityProfile::identifier_allowed) {
171-
cx.struct_span_lint(UNCOMMON_CODEPOINTS, ident.span, |lint| {
172-
lint.build("identifier contains uncommon Unicode codepoints").emit()
173-
})
139+
}
140+
141+
impl PartialEq<CowBoxSymStr> for CowBoxSymStr {
142+
#[inline]
143+
fn eq(&self, other: &CowBoxSymStr) -> bool {
144+
PartialEq::eq(&**self, &**other)
174145
}
175146
}
147+
148+
impl Eq for CowBoxSymStr {}
176149
}

src/librustc_session/parse.rs

+2-1
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ use rustc_span::hygiene::ExpnId;
1313
use rustc_span::source_map::{FilePathMapping, SourceMap};
1414
use rustc_span::{MultiSpan, Span, Symbol};
1515

16+
use std::collections::BTreeMap;
1617
use std::path::PathBuf;
1718
use std::str;
1819

@@ -63,7 +64,7 @@ impl GatedSpans {
6364
#[derive(Default)]
6465
pub struct SymbolGallery {
6566
/// All symbols occurred and their first occurrance span.
66-
pub symbols: Lock<FxHashMap<Symbol, Span>>,
67+
pub symbols: Lock<BTreeMap<Symbol, Span>>,
6768
}
6869

6970
impl SymbolGallery {

src/test/ui/lint/rfc-2457-non-ascii-idents/lint-confusable-idents.rs

+8-2
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,14 @@
22
#![deny(confusable_idents)]
33
#![allow(uncommon_codepoints, non_upper_case_globals)]
44

5-
const: usize = 42; //~ ERROR identifier pair considered confusable
5+
const: usize = 42;
66

77
fn main() {
8-
let s = "rust";
8+
let s = "rust"; //~ ERROR identifier pair considered confusable
9+
not_affected();
10+
}
11+
12+
fn not_affected() {
13+
let s1 = 1;
14+
let sl = 'l';
915
}

src/test/ui/lint/rfc-2457-non-ascii-idents/lint-confusable-idents.stderr

+4-4
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
1-
error: identifier pair considered confusable between `s` and ``
2-
--> $DIR/lint-confusable-idents.rs:5:7
1+
error: identifier pair considered confusable between `` and `s`
2+
--> $DIR/lint-confusable-idents.rs:8:9
33
|
44
LL | const s: usize = 42;
5-
| ^^
5+
| -- this is where the previous identifier occurred
66
...
77
LL | let s = "rust";
8-
| - this is where the previous identifier occurred
8+
| ^
99
|
1010
note: the lint level is defined here
1111
--> $DIR/lint-confusable-idents.rs:2:9

src/test/ui/lint/rfc-2457-non-ascii-idents/lint-non-ascii-idents.rs

+3-1
Original file line numberDiff line numberDiff line change
@@ -7,5 +7,7 @@ fn coöperation() {} //~ ERROR identifier contains non-ASCII characters
77

88
fn main() {
99
let naïveté = 2; //~ ERROR identifier contains non-ASCII characters
10-
println!("{}", naïveté); //~ ERROR identifier contains non-ASCII characters
10+
11+
// using the same identifier the second time won't trigger the lint.
12+
println!("{}", naïveté);
1113
}

src/test/ui/lint/rfc-2457-non-ascii-idents/lint-non-ascii-idents.stderr

+1-7
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,5 @@ error: identifier contains non-ASCII characters
2222
LL | let naïveté = 2;
2323
| ^^^^^^^
2424

25-
error: identifier contains non-ASCII characters
26-
--> $DIR/lint-non-ascii-idents.rs:10:20
27-
|
28-
LL | println!("{}", naïveté);
29-
| ^^^^^^^
30-
31-
error: aborting due to 4 previous errors
25+
error: aborting due to 3 previous errors
3226

src/test/ui/lint/rfc-2457-non-ascii-idents/lint-uncommon-codepoints.rs

+3-1
Original file line numberDiff line numberDiff line change
@@ -7,5 +7,7 @@ fn dijkstra() {} //~ ERROR identifier contains uncommon Unicode codepoints
77

88
fn main() {
99
let ㇻㇲㇳ = "rust"; //~ ERROR identifier contains uncommon Unicode codepoints
10-
println!("{}", ㇻㇲㇳ); //~ ERROR identifier contains uncommon Unicode codepoints
10+
11+
// using the same identifier the second time won't trigger the lint.
12+
println!("{}", ㇻㇲㇳ);
1113
}

src/test/ui/lint/rfc-2457-non-ascii-idents/lint-uncommon-codepoints.stderr

+1-7
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,5 @@ error: identifier contains uncommon Unicode codepoints
2222
LL | let ㇻㇲㇳ = "rust";
2323
| ^^^^^^
2424

25-
error: identifier contains uncommon Unicode codepoints
26-
--> $DIR/lint-uncommon-codepoints.rs:10:20
27-
|
28-
LL | println!("{}", ㇻㇲㇳ);
29-
| ^^^^^^
30-
31-
error: aborting due to 4 previous errors
25+
error: aborting due to 3 previous errors
3226

src/test/ui/parser/issue-62524.rs

+2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
// ignore-tidy-trailing-newlines
22
// error-pattern: aborting due to 3 previous errors
3+
#![allow(uncommon_codepoints)]
4+
35
y![
46
Ϥ,

src/test/ui/parser/issue-62524.stderr

+3-3
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
error: this file contains an unclosed delimiter
2-
--> $DIR/issue-62524.rs:4:3
2+
--> $DIR/issue-62524.rs:6:3
33
|
44
LL | y![
55
| - unclosed delimiter
66
LL | Ϥ,
77
| ^
88

99
error: macros that expand to items must be delimited with braces or followed by a semicolon
10-
--> $DIR/issue-62524.rs:3:3
10+
--> $DIR/issue-62524.rs:5:3
1111
|
1212
LL | y![
1313
| ___^
@@ -24,7 +24,7 @@ LL | Ϥ,;
2424
| ^
2525

2626
error: cannot find macro `y` in this scope
27-
--> $DIR/issue-62524.rs:3:1
27+
--> $DIR/issue-62524.rs:5:1
2828
|
2929
LL | y![
3030
| ^

0 commit comments

Comments
 (0)