1
+ use crate :: { EarlyContext , EarlyLintPass , LintContext } ;
2
+ use rustc_ast as ast;
3
+ use rustc_errors:: { Applicability , SuggestionStyle } ;
4
+ use rustc_span:: { BytePos , Span , Symbol } ;
5
+
6
+ declare_lint ! {
7
+ /// The `text_direction_codepoint_in_literal` lint detects Unicode codepoints that change the
8
+ /// visual representation of text on screen in a way that does not correspond to their on
9
+ /// memory representation.
10
+ ///
11
+ /// ### Explanation
12
+ ///
13
+ /// The unicode characters `\u{202A}`, `\u{202B}`, `\u{202D}`, `\u{202E}`, `\u{2066}`,
14
+ /// `\u{2067}`, `\u{2068}`, `\u{202C}` and `\u{2069}` make the flow of text on screen change
15
+ /// its direction on software that supports these codepoints. This makes the text "abc" display
16
+ /// as "cba" on screen. By leveraging software that supports these, people can write specially
17
+ /// crafted literals that make the surrounding code seem like it's performing one action, when
18
+ /// in reality it is performing another. Because of this, we proactively lint against their
19
+ /// presence to avoid surprises.
20
+ ///
21
+ /// ### Example
22
+ ///
23
+ /// ```rust,compile_fail
24
+ /// #![deny(text_direction_codepoint_in_literal)]
25
+ /// fn main() {
26
+ /// println!("{:?}", '');
27
+ /// }
28
+ /// ```
29
+ ///
30
+ /// {{produces}}
31
+ ///
32
+ pub TEXT_DIRECTION_CODEPOINT_IN_LITERAL ,
33
+ Deny ,
34
+ "detect special Unicode codepoints that affect the visual representation of text on screen, \
35
+ changing the direction in which text flows",
36
+ }
37
+
38
+ declare_lint_pass ! ( HiddenUnicodeCodepoints => [ TEXT_DIRECTION_CODEPOINT_IN_LITERAL ] ) ;
39
+
40
+ crate const UNICODE_TEXT_FLOW_CHARS : & [ char ] = & [
41
+ '\u{202A}' , '\u{202B}' , '\u{202D}' , '\u{202E}' , '\u{2066}' , '\u{2067}' , '\u{2068}' , '\u{202C}' ,
42
+ '\u{2069}' ,
43
+ ] ;
44
+
45
+ impl HiddenUnicodeCodepoints {
46
+ fn lint_text_direction_codepoint (
47
+ & self ,
48
+ cx : & EarlyContext < ' _ > ,
49
+ text : Symbol ,
50
+ span : Span ,
51
+ padding : u32 ,
52
+ point_at_inner_spans : bool ,
53
+ label : & str ,
54
+ ) {
55
+ // Obtain the `Span`s for each of the forbidden chars.
56
+ let spans: Vec < _ > = text
57
+ . as_str ( )
58
+ . char_indices ( )
59
+ . filter_map ( |( i, c) | {
60
+ UNICODE_TEXT_FLOW_CHARS . contains ( & c) . then ( || {
61
+ let lo = span. lo ( ) + BytePos ( i as u32 + padding) ;
62
+ ( c, span. with_lo ( lo) . with_hi ( lo + BytePos ( c. len_utf8 ( ) as u32 ) ) )
63
+ } )
64
+ } )
65
+ . collect ( ) ;
66
+
67
+ cx. struct_span_lint ( TEXT_DIRECTION_CODEPOINT_IN_LITERAL , span, |lint| {
68
+ let mut err = lint. build ( & format ! (
69
+ "unicode codepoint changing visible direction of text present in {}" ,
70
+ label
71
+ ) ) ;
72
+ let ( an, s) = match spans. len ( ) {
73
+ 1 => ( "an " , "" ) ,
74
+ _ => ( "" , "s" ) ,
75
+ } ;
76
+ err. span_label (
77
+ span,
78
+ & format ! (
79
+ "this {} contains {}invisible unicode text flow control codepoint{}" ,
80
+ label, an, s,
81
+ ) ,
82
+ ) ;
83
+ if point_at_inner_spans {
84
+ for ( c, span) in & spans {
85
+ err. span_label ( * span, format ! ( "{:?}" , c) ) ;
86
+ }
87
+ }
88
+ err. note (
89
+ "these kind of unicode codepoints change the way text flows on applications that \
90
+ support them, but can cause confusion because they change the order of \
91
+ characters on the screen",
92
+ ) ;
93
+ if point_at_inner_spans && !spans. is_empty ( ) {
94
+ err. multipart_suggestion_with_style (
95
+ "if their presence wasn't intentional, you can remove them" ,
96
+ spans. iter ( ) . map ( |( _, span) | ( * span, "" . to_string ( ) ) ) . collect ( ) ,
97
+ Applicability :: MachineApplicable ,
98
+ SuggestionStyle :: HideCodeAlways ,
99
+ ) ;
100
+ err. multipart_suggestion (
101
+ "if you want to keep them but make them visible in your source code, you can \
102
+ escape them",
103
+ spans
104
+ . into_iter ( )
105
+ . map ( |( c, span) | {
106
+ let c = format ! ( "{:?}" , c) ;
107
+ ( span, c[ 1 ..c. len ( ) - 1 ] . to_string ( ) )
108
+ } )
109
+ . collect ( ) ,
110
+ Applicability :: MachineApplicable ,
111
+ ) ;
112
+ } else {
113
+ // FIXME: in other suggestions we've reversed the inner spans of doc comments. We
114
+ // should do the same here to provide the same good suggestions as we do for
115
+ // literals above.
116
+ err. note ( "if their presence wasn't intentional, you can remove them" ) ;
117
+ err. note ( & format ! (
118
+ "if you want to keep them but make them visible in your source code, you can \
119
+ escape them: {}",
120
+ spans
121
+ . into_iter( )
122
+ . map( |( c, _) | { format!( "{:?}" , c) } )
123
+ . collect:: <Vec <String >>( )
124
+ . join( ", " ) ,
125
+ ) ) ;
126
+ }
127
+ err. emit ( ) ;
128
+ } ) ;
129
+ }
130
+ }
131
+ impl EarlyLintPass for HiddenUnicodeCodepoints {
132
+ fn check_attribute ( & mut self , cx : & EarlyContext < ' _ > , attr : & ast:: Attribute ) {
133
+ if let ast:: AttrKind :: DocComment ( _, comment) = attr. kind {
134
+ if comment. as_str ( ) . contains ( UNICODE_TEXT_FLOW_CHARS ) {
135
+ self . lint_text_direction_codepoint ( cx, comment, attr. span , 0 , false , "doc comment" ) ;
136
+ }
137
+ }
138
+ }
139
+
140
+ fn check_expr ( & mut self , cx : & EarlyContext < ' _ > , expr : & ast:: Expr ) {
141
+ // byte strings are already handled well enough by `EscapeError::NonAsciiCharInByteString`
142
+ let ( text, span, padding) = match & expr. kind {
143
+ ast:: ExprKind :: Lit ( ast:: Lit { token, kind, span } ) => {
144
+ let text = token. symbol ;
145
+ if !text. as_str ( ) . contains ( UNICODE_TEXT_FLOW_CHARS ) {
146
+ return ;
147
+ }
148
+ let padding = match kind {
149
+ // account for `"` or `'`
150
+ ast:: LitKind :: Str ( _, ast:: StrStyle :: Cooked ) | ast:: LitKind :: Char ( _) => 1 ,
151
+ // account for `r###"`
152
+ ast:: LitKind :: Str ( _, ast:: StrStyle :: Raw ( val) ) => * val as u32 + 2 ,
153
+ _ => return ,
154
+ } ;
155
+ ( text, span, padding)
156
+ }
157
+ _ => return ,
158
+ } ;
159
+ self . lint_text_direction_codepoint ( cx, text, * span, padding, true , "literal" ) ;
160
+ }
161
+ }
0 commit comments