1
1
use crate :: { EarlyContext , EarlyLintPass , LintContext } ;
2
2
use rustc_ast:: ast;
3
3
use rustc_data_structures:: fx:: FxHashMap ;
4
- use rustc_span:: symbol:: { Ident , SymbolStr } ;
5
- use std:: hash:: { Hash , Hasher } ;
6
- use std:: ops:: Deref ;
4
+ use rustc_span:: symbol:: SymbolStr ;
7
5
8
6
declare_lint ! {
9
7
pub NON_ASCII_IDENTS ,
@@ -19,158 +17,133 @@ declare_lint! {
19
17
crate_level_only
20
18
}
21
19
22
- // FIXME: Change this to warn.
23
20
declare_lint ! {
24
21
pub CONFUSABLE_IDENTS ,
25
- Allow ,
22
+ Warn ,
26
23
"detects visually confusable pairs between identifiers" ,
27
24
crate_level_only
28
25
}
29
26
30
27
declare_lint_pass ! ( NonAsciiIdents => [ NON_ASCII_IDENTS , UNCOMMON_CODEPOINTS , CONFUSABLE_IDENTS ] ) ;
31
28
32
- enum CowBoxSymStr {
33
- Interned ( SymbolStr ) ,
34
- Owned ( Box < str > ) ,
35
- }
36
-
37
- impl Deref for CowBoxSymStr {
38
- type Target = str ;
39
-
40
- fn deref ( & self ) -> & str {
41
- match self {
42
- CowBoxSymStr :: Interned ( interned) => interned,
43
- CowBoxSymStr :: Owned ( ref owned) => owned,
44
- }
45
- }
46
- }
47
-
48
- impl Hash for CowBoxSymStr {
49
- #[ inline]
50
- fn hash < H : Hasher > ( & self , state : & mut H ) {
51
- Hash :: hash ( & * * self , state)
52
- }
53
- }
54
-
55
- impl PartialEq < CowBoxSymStr > for CowBoxSymStr {
56
- #[ inline]
57
- fn eq ( & self , other : & CowBoxSymStr ) -> bool {
58
- PartialEq :: eq ( & * * self , & * * other)
59
- }
60
- }
61
-
62
- impl Eq for CowBoxSymStr { }
63
-
64
- fn calc_skeleton ( symbol_str : SymbolStr , buffer : & ' _ mut String ) -> CowBoxSymStr {
65
- use std:: mem:: swap;
66
- use unicode_security:: confusable_detection:: skeleton;
67
- buffer. clear ( ) ;
68
- buffer. extend ( skeleton ( & symbol_str) ) ;
69
- if symbol_str == * buffer {
70
- CowBoxSymStr :: Interned ( symbol_str)
71
- } else {
72
- let mut owned = String :: new ( ) ;
73
- swap ( buffer, & mut owned) ;
74
- CowBoxSymStr :: Owned ( owned. into_boxed_str ( ) )
75
- }
76
- }
77
-
78
- fn is_in_ascii_confusable_closure ( c : char ) -> bool {
79
- // FIXME: move this table to `unicode_security` crate.
80
- // data here corresponds to Unicode 13.
81
- const ASCII_CONFUSABLE_CLOSURE : & [ ( u64 , u64 ) ] = & [ ( 0x00 , 0x7f ) , ( 0xba , 0xba ) , ( 0x2080 , 0x2080 ) ] ;
82
- let c = c as u64 ;
83
- for & ( range_start, range_end) in ASCII_CONFUSABLE_CLOSURE {
84
- if c >= range_start && c <= range_end {
85
- return true ;
86
- }
87
- }
88
- false
89
- }
90
-
91
- fn is_in_ascii_confusable_closure_relevant_list ( c : char ) -> bool {
92
- // FIXME: move this table to `unicode_security` crate.
93
- // data here corresponds to Unicode 13.
94
- const ASCII_CONFUSABLE_CLOSURE_RELEVANT_LIST : & [ u64 ] = & [
95
- 0x22 , 0x25 , 0x27 , 0x2f , 0x30 , 0x31 , 0x49 , 0x4f , 0x60 , 0x6c , 0x6d , 0x6e , 0x72 , 0x7c , 0xba ,
96
- 0x2080 ,
97
- ] ;
98
- let c = c as u64 ;
99
- for & item in ASCII_CONFUSABLE_CLOSURE_RELEVANT_LIST {
100
- if c == item {
101
- return true ;
102
- }
103
- }
104
- false
105
- }
106
-
107
29
impl EarlyLintPass for NonAsciiIdents {
108
30
fn check_crate ( & mut self , cx : & EarlyContext < ' _ > , _: & ast:: Crate ) {
109
31
use rustc_session:: lint:: Level ;
110
- if cx. builder . lint_level ( CONFUSABLE_IDENTS ) . 0 == Level :: Allow {
32
+ use rustc_span:: Span ;
33
+ use unicode_security:: GeneralSecurityProfile ;
34
+ use utils:: CowBoxSymStr ;
35
+
36
+ let check_non_ascii_idents = cx. builder . lint_level ( NON_ASCII_IDENTS ) . 0 != Level :: Allow ;
37
+ let check_uncommon_codepoints =
38
+ cx. builder . lint_level ( UNCOMMON_CODEPOINTS ) . 0 != Level :: Allow ;
39
+ let check_confusable_idents = cx. builder . lint_level ( CONFUSABLE_IDENTS ) . 0 != Level :: Allow ;
40
+
41
+ if !check_non_ascii_idents && !check_uncommon_codepoints && !check_confusable_idents {
111
42
return ;
112
43
}
44
+
45
+ let mut has_non_ascii_idents = false ;
113
46
let symbols = cx. sess . parse_sess . symbol_gallery . symbols . lock ( ) ;
114
- let mut symbol_strs_and_spans = Vec :: with_capacity ( symbols. len ( ) ) ;
115
- let mut in_fast_path = true ;
116
- for ( symbol, sp) in symbols. iter ( ) {
117
- // fast path
47
+ for ( symbol, & sp) in symbols. iter ( ) {
118
48
let symbol_str = symbol. as_str ( ) ;
119
- if !symbol_str. chars ( ) . all ( is_in_ascii_confusable_closure) {
120
- // fallback to slow path.
121
- symbol_strs_and_spans. clear ( ) ;
122
- in_fast_path = false ;
123
- break ;
49
+ if symbol_str. is_ascii ( ) {
50
+ continue ;
124
51
}
125
- if symbol_str. chars ( ) . any ( is_in_ascii_confusable_closure_relevant_list) {
126
- symbol_strs_and_spans. push ( ( symbol_str, * sp) ) ;
52
+ has_non_ascii_idents = true ;
53
+ cx. struct_span_lint ( NON_ASCII_IDENTS , sp, |lint| {
54
+ lint. build ( "identifier contains non-ASCII characters" ) . emit ( )
55
+ } ) ;
56
+ if check_uncommon_codepoints
57
+ && !symbol_str. chars ( ) . all ( GeneralSecurityProfile :: identifier_allowed)
58
+ {
59
+ cx. struct_span_lint ( UNCOMMON_CODEPOINTS , sp, |lint| {
60
+ lint. build ( "identifier contains uncommon Unicode codepoints" ) . emit ( )
61
+ } )
127
62
}
128
63
}
129
- if !in_fast_path {
130
- // slow path
131
- for ( symbol, sp) in symbols. iter ( ) {
64
+
65
+ if has_non_ascii_idents && check_confusable_idents {
66
+ let mut skeleton_map: FxHashMap < CowBoxSymStr , ( SymbolStr , Span , bool ) > =
67
+ FxHashMap :: with_capacity_and_hasher ( symbols. len ( ) , Default :: default ( ) ) ;
68
+ let mut str_buf = String :: new ( ) ;
69
+ for ( symbol, & sp) in symbols. iter ( ) {
70
+ fn calc_skeleton ( symbol_str : & SymbolStr , buffer : & mut String ) -> CowBoxSymStr {
71
+ use std:: mem:: replace;
72
+ use unicode_security:: confusable_detection:: skeleton;
73
+ buffer. clear ( ) ;
74
+ buffer. extend ( skeleton ( symbol_str) ) ;
75
+ if * symbol_str == * buffer {
76
+ CowBoxSymStr :: Interned ( symbol_str. clone ( ) )
77
+ } else {
78
+ let owned = replace ( buffer, String :: new ( ) ) ;
79
+ CowBoxSymStr :: Owned ( owned. into_boxed_str ( ) )
80
+ }
81
+ }
132
82
let symbol_str = symbol. as_str ( ) ;
133
- symbol_strs_and_spans. push ( ( symbol_str, * sp) ) ;
83
+ let is_ascii = symbol_str. is_ascii ( ) ;
84
+ let skeleton = calc_skeleton ( & symbol_str, & mut str_buf) ;
85
+ skeleton_map
86
+ . entry ( skeleton)
87
+ . and_modify ( |( existing_symbolstr, existing_span, existing_is_ascii) | {
88
+ if !* existing_is_ascii || !is_ascii {
89
+ cx. struct_span_lint ( CONFUSABLE_IDENTS , sp, |lint| {
90
+ lint. build ( & format ! (
91
+ "identifier pair considered confusable between `{}` and `{}`" ,
92
+ existing_symbolstr, symbol_str
93
+ ) )
94
+ . span_label (
95
+ * existing_span,
96
+ "this is where the previous identifier occurred" ,
97
+ )
98
+ . emit ( ) ;
99
+ } ) ;
100
+ }
101
+ if * existing_is_ascii && !is_ascii {
102
+ * existing_symbolstr = symbol_str. clone ( ) ;
103
+ * existing_span = sp;
104
+ * existing_is_ascii = is_ascii;
105
+ }
106
+ } )
107
+ . or_insert ( ( symbol_str, sp, is_ascii) ) ;
134
108
}
135
109
}
136
- drop ( symbols) ;
137
- symbol_strs_and_spans. sort_by_key ( |x| x. 0 . clone ( ) ) ;
138
- let mut skeleton_map =
139
- FxHashMap :: with_capacity_and_hasher ( symbol_strs_and_spans. len ( ) , Default :: default ( ) ) ;
140
- let mut str_buf = String :: new ( ) ;
141
- for ( symbol_str, sp) in symbol_strs_and_spans {
142
- let skeleton = calc_skeleton ( symbol_str. clone ( ) , & mut str_buf) ;
143
- skeleton_map
144
- . entry ( skeleton)
145
- . and_modify ( |( existing_symbolstr, existing_span) | {
146
- cx. struct_span_lint ( CONFUSABLE_IDENTS , sp, |lint| {
147
- lint. build ( & format ! (
148
- "identifier pair considered confusable between `{}` and `{}`" ,
149
- existing_symbolstr, symbol_str
150
- ) )
151
- . span_label (
152
- * existing_span,
153
- "this is where the previous identifier occurred" ,
154
- )
155
- . emit ( ) ;
156
- } ) ;
157
- } )
158
- . or_insert ( ( symbol_str, sp) ) ;
110
+ }
111
+ }
112
+
113
+ mod utils {
114
+ use rustc_span:: symbol:: SymbolStr ;
115
+ use std:: hash:: { Hash , Hasher } ;
116
+ use std:: ops:: Deref ;
117
+
118
+ pub ( super ) enum CowBoxSymStr {
119
+ Interned ( SymbolStr ) ,
120
+ Owned ( Box < str > ) ,
121
+ }
122
+
123
+ impl Deref for CowBoxSymStr {
124
+ type Target = str ;
125
+
126
+ fn deref ( & self ) -> & str {
127
+ match self {
128
+ CowBoxSymStr :: Interned ( interned) => interned,
129
+ CowBoxSymStr :: Owned ( ref owned) => owned,
130
+ }
159
131
}
160
132
}
161
- fn check_ident ( & mut self , cx : & EarlyContext < ' _ > , ident : Ident ) {
162
- use unicode_security :: GeneralSecurityProfile ;
163
- let name_str = ident . name . as_str ( ) ;
164
- if name_str . is_ascii ( ) {
165
- return ;
133
+
134
+ impl Hash for CowBoxSymStr {
135
+ # [ inline ]
136
+ fn hash < H : Hasher > ( & self , state : & mut H ) {
137
+ Hash :: hash ( & * * self , state )
166
138
}
167
- cx. struct_span_lint ( NON_ASCII_IDENTS , ident. span , |lint| {
168
- lint. build ( "identifier contains non-ASCII characters" ) . emit ( )
169
- } ) ;
170
- if !name_str. chars ( ) . all ( GeneralSecurityProfile :: identifier_allowed) {
171
- cx. struct_span_lint ( UNCOMMON_CODEPOINTS , ident. span , |lint| {
172
- lint. build ( "identifier contains uncommon Unicode codepoints" ) . emit ( )
173
- } )
139
+ }
140
+
141
+ impl PartialEq < CowBoxSymStr > for CowBoxSymStr {
142
+ #[ inline]
143
+ fn eq ( & self , other : & CowBoxSymStr ) -> bool {
144
+ PartialEq :: eq ( & * * self , & * * other)
174
145
}
175
146
}
147
+
148
+ impl Eq for CowBoxSymStr { }
176
149
}
0 commit comments