1
- // Copyright 2012-2013 The Rust Project Developers. See the COPYRIGHT
1
+ // Copyright 2012-2017 The Rust Project Developers. See the COPYRIGHT
2
2
// file at the top-level directory of this distribution and at
3
3
// http://rust-lang.org/COPYRIGHT.
4
4
//
9
9
// except according to those terms.
10
10
11
11
// Characters and their corresponding confusables were collected from
12
- // http://www.unicode.org/Public/security/revision-06 /confusables.txt
12
+ // http://www.unicode.org/Public/security/10.0.0 /confusables.txt
13
13
14
14
use syntax_pos:: { Span , NO_EXPANSION } ;
15
15
use errors:: DiagnosticBuilder ;
16
16
use super :: StringReader ;
17
17
18
18
const UNICODE_ARRAY : & ' static [ ( char , & ' static str , char ) ] = & [
19
- ( ' ' , "No-Break Space" , ' ' ) ,
20
- ( ' ' , "Ogham Space Mark" , ' ' ) ,
19
+ ( '
' , "Line Separator" , ' ' ) ,
20
+ ( '
' , "Paragraph Separator" , ' ' ) ,
21
+ ( ' ' , "Ogham Space mark" , ' ' ) ,
21
22
( ' ' , "En Quad" , ' ' ) ,
22
23
( ' ' , "Em Quad" , ' ' ) ,
23
24
( ' ' , "En Space" , ' ' ) ,
24
25
( ' ' , "Em Space" , ' ' ) ,
25
26
( ' ' , "Three-Per-Em Space" , ' ' ) ,
26
27
( ' ' , "Four-Per-Em Space" , ' ' ) ,
27
28
( ' ' , "Six-Per-Em Space" , ' ' ) ,
28
- ( ' ' , "Figure Space" , ' ' ) ,
29
29
( ' ' , "Punctuation Space" , ' ' ) ,
30
30
( ' ' , "Thin Space" , ' ' ) ,
31
31
( ' ' , "Hair Space" , ' ' ) ,
32
- ( ' ' , "Narrow No-Break Space" , ' ' ) ,
33
32
( ' ' , "Medium Mathematical Space" , ' ' ) ,
33
+ ( ' ' , "No-Break Space" , ' ' ) ,
34
+ ( ' ' , "Figure Space" , ' ' ) ,
35
+ ( ' ' , "Narrow No-Break Space" , ' ' ) ,
34
36
( ' ' , "Ideographic Space" , ' ' ) ,
37
+
35
38
( 'ߺ' , "Nko Lajanyalan" , '_' ) ,
36
39
( '﹍' , "Dashed Low Line" , '_' ) ,
37
40
( '﹎' , "Centreline Low Line" , '_' ) ,
38
41
( '﹏' , "Wavy Low Line" , '_' ) ,
42
+ ( '_' , "Fullwidth Low Line" , '_' ) ,
43
+
39
44
( '‐' , "Hyphen" , '-' ) ,
40
45
( '‑' , "Non-Breaking Hyphen" , '-' ) ,
41
46
( '‒' , "Figure Dash" , '-' ) ,
42
47
( '–' , "En Dash" , '-' ) ,
43
48
( '—' , "Em Dash" , '-' ) ,
44
49
( '﹘' , "Small Em Dash" , '-' ) ,
50
+ ( '۔' , "Arabic Full Stop" , '-' ) ,
45
51
( '⁃' , "Hyphen Bullet" , '-' ) ,
46
52
( '˗' , "Modifier Letter Minus Sign" , '-' ) ,
47
53
( '−' , "Minus Sign" , '-' ) ,
54
+ ( '➖' , "Heavy Minus Sign" , '-' ) ,
55
+ ( 'Ⲻ' , "Coptic Letter Dialect-P Ni" , '-' ) ,
48
56
( 'ー' , "Katakana-Hiragana Prolonged Sound Mark" , '-' ) ,
57
+ ( '-' , "Fullwidth Hyphen-Minus" , '-' ) ,
58
+ ( '―' , "Horizontal Bar" , '-' ) ,
59
+ ( '─' , "Box Drawings Light Horizontal" , '-' ) ,
60
+ ( '━' , "Box Drawings Heavy Horizontal" , '-' ) ,
61
+ ( '㇐' , "CJK Stroke H" , '-' ) ,
62
+ ( 'ꟷ' , "Latin Epigraphic Letter Dideways" , '-' ) ,
63
+ ( 'ᅳ' , "Hangul Jungseong Eu" , '-' ) ,
64
+ ( 'ㅡ' , "Hangul Letter Eu" , '-' ) ,
65
+ ( '一' , "CJK Unified Ideograph-4E00" , '-' ) ,
66
+ ( '⼀' , "Kangxi Radical One" , '-' ) ,
67
+
68
+ ( '؍' , "Arabic Date Separator" , ',' ) ,
49
69
( '٫' , "Arabic Decimal Separator" , ',' ) ,
50
70
( '‚' , "Single Low-9 Quotation Mark" , ',' ) ,
71
+ ( '¸' , "Cedilla" , ',' ) ,
51
72
( 'ꓹ' , "Lisu Letter Tone Na Po" , ',' ) ,
52
73
( ',' , "Fullwidth Comma" , ',' ) ,
74
+
53
75
( ';' , "Greek Question Mark" , ';' ) ,
54
76
( ';' , "Fullwidth Semicolon" , ';' ) ,
77
+ ( '︔' , "Presentation Form For Vertical Semicolon" , ';' ) ,
78
+
55
79
( 'ः' , "Devanagari Sign Visarga" , ':' ) ,
56
80
( 'ઃ' , "Gujarati Sign Visarga" , ':' ) ,
57
81
( ':' , "Fullwidth Colon" , ':' ) ,
58
82
( '։' , "Armenian Full Stop" , ':' ) ,
59
83
( '܃' , "Syriac Supralinear Colon" , ':' ) ,
60
84
( '܄' , "Syriac Sublinear Colon" , ':' ) ,
85
+ ( '᛬' , "Runic Multiple Ponctuation" , ':' ) ,
61
86
( '︰' , "Presentation Form For Vertical Two Dot Leader" , ':' ) ,
62
87
( '᠃' , "Mongolian Full Stop" , ':' ) ,
63
88
( '᠉' , "Mongolian Manchu Full Stop" , ':' ) ,
@@ -68,25 +93,48 @@ const UNICODE_ARRAY: &'static [(char, &'static str, char)] = &[
68
93
( '∶' , "Ratio" , ':' ) ,
69
94
( 'ː' , "Modifier Letter Triangular Colon" , ':' ) ,
70
95
( 'ꓽ' , "Lisu Letter Tone Mya Jeu" , ':' ) ,
96
+ ( '︓' , "Presentation Form For Vertical Colon" , ':' ) ,
97
+
71
98
( '!' , "Fullwidth Exclamation Mark" , '!' ) ,
72
99
( 'ǃ' , "Latin Letter Retroflex Click" , '!' ) ,
100
+ ( 'ⵑ' , "Tifinagh Letter Tuareg Yang" , '!' ) ,
101
+ ( '︕' , "Presentation Form For Vertical Exclamation Mark" , '!' ) ,
102
+
73
103
( 'ʔ' , "Latin Letter Glottal Stop" , '?' ) ,
104
+ ( 'Ɂ' , "Latin Capital Letter Glottal Stop" , '?' ) ,
74
105
( 'ॽ' , "Devanagari Letter Glottal Stop" , '?' ) ,
75
106
( 'Ꭾ' , "Cherokee Letter He" , '?' ) ,
107
+ ( 'ꛫ' , "Bamum Letter Ntuu" , '?' ) ,
76
108
( '?' , "Fullwidth Question Mark" , '?' ) ,
109
+ ( '︖' , "Presentation Form For Vertical Question Mark" , '?' ) ,
110
+
77
111
( '𝅭' , "Musical Symbol Combining Augmentation Dot" , '.' ) ,
78
112
( '․' , "One Dot Leader" , '.' ) ,
79
- ( '۔' , "Arabic Full Stop" , '.' ) ,
80
113
( '܁' , "Syriac Supralinear Full Stop" , '.' ) ,
81
114
( '܂' , "Syriac Sublinear Full Stop" , '.' ) ,
82
115
( '꘎' , "Vai Full Stop" , '.' ) ,
83
116
( '𐩐' , "Kharoshthi Punctuation Dot" , '.' ) ,
84
- ( '·' , "Middle Dot" , '.' ) ,
85
117
( '٠' , "Arabic-Indic Digit Zero" , '.' ) ,
86
118
( '۰' , "Extended Arabic-Indic Digit Zero" , '.' ) ,
87
119
( 'ꓸ' , "Lisu Letter Tone Mya Ti" , '.' ) ,
88
- ( '。 ' , "Ideographic Full Stop " , '.' ) ,
120
+ ( '· ' , "Middle Dot " , '.' ) ,
89
121
( '・' , "Katakana Middle Dot" , '.' ) ,
122
+ ( '・' , "Halfwidth Katakana Middle Dot" , '.' ) ,
123
+ ( '᛫' , "Runic Single Punctuation" , '.' ) ,
124
+ ( '·' , "Greek Ano Teleia" , '.' ) ,
125
+ ( '⸱' , "Word Separator Middle Dot" , '.' ) ,
126
+ ( '𐄁' , "Aegean Word Separator Dot" , '.' ) ,
127
+ ( '•' , "Bullet" , '.' ) ,
128
+ ( '‧' , "Hyphenation Point" , '.' ) ,
129
+ ( '∙' , "Bullet Operator" , '.' ) ,
130
+ ( '⋅' , "Dot Operator" , '.' ) ,
131
+ ( 'ꞏ' , "Latin Letter Sinological Dot" , '.' ) ,
132
+ ( 'ᐧ' , "Canadian Syllabics Final Middle Dot" , '.' ) ,
133
+ ( 'ᐧ' , "Canadian Syllabics Final Middle Dot" , '.' ) ,
134
+ ( '.' , "Fullwidth Full Stop" , '.' ) ,
135
+ ( '。' , "Ideographic Full Stop" , '.' ) ,
136
+ ( '︒' , "Presentation Form For Vertical Ideographic Full Stop" , '.' ) ,
137
+
90
138
( '՝' , "Armenian Comma" , '\'' ) ,
91
139
( ''' , "Fullwidth Apostrophe" , '\'' ) ,
92
140
( '‘' , "Left Single Quotation Mark" , '\'' ) ,
@@ -96,15 +144,18 @@ const UNICODE_ARRAY: &'static [(char, &'static str, char)] = &[
96
144
( '‵' , "Reversed Prime" , '\'' ) ,
97
145
( '՚' , "Armenian Apostrophe" , '\'' ) ,
98
146
( '׳' , "Hebrew Punctuation Geresh" , '\'' ) ,
147
+ ( '`' , "Greek Accent" , '\'' ) ,
99
148
( '`' , "Greek Varia" , '\'' ) ,
100
149
( '`' , "Fullwidth Grave Accent" , '\'' ) ,
150
+ ( '´' , "Acute Accent" , '\'' ) ,
101
151
( '΄' , "Greek Tonos" , '\'' ) ,
102
152
( '´' , "Greek Oxia" , '\'' ) ,
103
153
( '᾽' , "Greek Koronis" , '\'' ) ,
104
154
( '᾿' , "Greek Psili" , '\'' ) ,
105
155
( '῾' , "Greek Dasia" , '\'' ) ,
106
156
( 'ʹ' , "Modifier Letter Prime" , '\'' ) ,
107
157
( 'ʹ' , "Greek Numeral Sign" , '\'' ) ,
158
+ ( 'ˈ' , "Modifier Letter Vertical Line" , '\'' ) ,
108
159
( 'ˊ' , "Modifier Letter Acute Accent" , '\'' ) ,
109
160
( 'ˋ' , "Modifier Letter Grave Accent" , '\'' ) ,
110
161
( '˴' , "Modifier Letter Middle Grave Accent" , '\'' ) ,
@@ -116,6 +167,12 @@ const UNICODE_ARRAY: &'static [(char, &'static str, char)] = &[
116
167
( 'י' , "Hebrew Letter Yod" , '\'' ) ,
117
168
( 'ߴ' , "Nko High Tone Apostrophe" , '\'' ) ,
118
169
( 'ߵ' , "Nko Low Tone Apostrophe" , '\'' ) ,
170
+ ( 'ᑊ' , "Canadian Syllabics West-Cree P" , '\'' ) ,
171
+ ( 'ᛌ' , "Runic Letter Short-Twig-Sol S" , '\'' ) ,
172
+ ( '𖽑' , "Miao Sign Aspiration" , '\'' ) ,
173
+ ( '𖽒' , "Miao Sign Reformed Voicing" , '\'' ) ,
174
+
175
+ ( '᳓' , "Vedic Sign Nihshvasa" , '"' ) ,
119
176
( '"' , "Fullwidth Quotation Mark" , '"' ) ,
120
177
( '“' , "Left Double Quotation Mark" , '"' ) ,
121
178
( '”' , "Right Double Quotation Mark" , '"' ) ,
@@ -132,12 +189,15 @@ const UNICODE_ARRAY: &'static [(char, &'static str, char)] = &[
132
189
( 'ײ' , "Hebrew Ligature Yiddish Double Yod" , '"' ) ,
133
190
( '❞' , "Heavy Double Comma Quotation Mark Ornament" , '"' ) ,
134
191
( '❝' , "Heavy Double Turned Comma Quotation Mark Ornament" , '"' ) ,
192
+
193
+ ( '(' , "Fullwidth Left Parenthesis" , '(' ) ,
135
194
( '❨' , "Medium Left Parenthesis Ornament" , '(' ) ,
136
195
( '﴾' , "Ornate Left Parenthesis" , '(' ) ,
137
- ( '(' , "Fullwidth Left Parenthesis" , '(' ) ,
196
+
197
+ ( ')' , "Fullwidth Right Parenthesis" , ')' ) ,
138
198
( '❩' , "Medium Right Parenthesis Ornament" , ')' ) ,
139
199
( '﴿' , "Ornate Right Parenthesis" , ')' ) ,
140
- ( ')' , "Fullwidth Right Parenthesis" , ')' ) ,
200
+
141
201
( '[' , "Fullwidth Left Square Bracket" , '[' ) ,
142
202
( '❲' , "Light Left Tortoise Shell Bracket Ornament" , '[' ) ,
143
203
( '「' , "Left Corner Bracket" , '[' ) ,
@@ -147,6 +207,7 @@ const UNICODE_ARRAY: &'static [(char, &'static str, char)] = &[
147
207
( '〖' , "Left White Lenticular Bracket" , '[' ) ,
148
208
( '〘' , "Left White Tortoise Shell Bracket" , '[' ) ,
149
209
( '〚' , "Left White Square Bracket" , '[' ) ,
210
+
150
211
( ']' , "Fullwidth Right Square Bracket" , ']' ) ,
151
212
( '❳' , "Light Right Tortoise Shell Bracket Ornament" , ']' ) ,
152
213
( '」' , "Right Corner Bracket" , ']' ) ,
@@ -156,49 +217,94 @@ const UNICODE_ARRAY: &'static [(char, &'static str, char)] = &[
156
217
( '〗' , "Right White Lenticular Bracket" , ']' ) ,
157
218
( '〙' , "Right White Tortoise Shell Bracket" , ']' ) ,
158
219
( '〛' , "Right White Square Bracket" , ']' ) ,
220
+
159
221
( '❴' , "Medium Left Curly Bracket Ornament" , '{' ) ,
222
+ ( '𝄔' , "Musical Symbol Brace" , '{' ) ,
223
+ ( '{' , "Fullwidth Left Curly Bracket" , '{' ) ,
224
+
160
225
( '❵' , "Medium Right Curly Bracket Ornament" , '}' ) ,
226
+ ( '}' , "Fullwidth Right Curly Bracket" , '}' ) ,
227
+
161
228
( '⁎' , "Low Asterisk" , '*' ) ,
162
229
( '٭' , "Arabic Five Pointed Star" , '*' ) ,
163
230
( '∗' , "Asterisk Operator" , '*' ) ,
231
+ ( '𐌟' , "Old Italic Letter Ess" , '*' ) ,
232
+ ( '*' , "Fullwidth Asterisk" , '*' ) ,
233
+
164
234
( '᜵' , "Philippine Single Punctuation" , '/' ) ,
165
235
( '⁁' , "Caret Insertion Point" , '/' ) ,
166
236
( '∕' , "Division Slash" , '/' ) ,
167
237
( '⁄' , "Fraction Slash" , '/' ) ,
168
238
( '╱' , "Box Drawings Light Diagonal Upper Right To Lower Left" , '/' ) ,
169
239
( '⟋' , "Mathematical Rising Diagonal" , '/' ) ,
170
240
( '⧸' , "Big Solidus" , '/' ) ,
171
- ( '㇓' , "Cjk Stroke Sp" , '/' ) ,
241
+ ( '𝈺' , "Greek Instrumental Notation Symbol-47" , '/' ) ,
242
+ ( '㇓' , "CJK Stroke Sp" , '/' ) ,
172
243
( '〳' , "Vertical Kana Repeat Mark Upper Half" , '/' ) ,
173
- ( '丿' , "Cjk Unified Ideograph-4E3F" , '/' ) ,
244
+ ( 'Ⳇ' , "Coptic Capital Letter Old Coptic Esh" , '/' ) ,
245
+ ( 'ノ' , "Katakana Letter No" , '/' ) ,
246
+ ( '丿' , "CJK Unified Ideograph-4E3F" , '/' ) ,
174
247
( '⼃' , "Kangxi Radical Slash" , '/' ) ,
248
+ ( '/' , "Fullwidth Solidus" , '/' ) ,
249
+
175
250
( '\' , "Fullwidth Reverse Solidus" , '\\' ) ,
176
251
( '﹨' , "Small Reverse Solidus" , '\\' ) ,
177
252
( '∖' , "Set Minus" , '\\' ) ,
178
253
( '⟍' , "Mathematical Falling Diagonal" , '\\' ) ,
179
254
( '⧵' , "Reverse Solidus Operator" , '\\' ) ,
180
255
( '⧹' , "Big Reverse Solidus" , '\\' ) ,
256
+ ( '⧹' , "Greek Vocal Notation Symbol-16" , '\\' ) ,
257
+ ( '⧹' , "Greek Instrumental Symbol-48" , '\\' ) ,
258
+ ( '㇔' , "CJK Stroke D" , '\\' ) ,
259
+ ( '丶' , "CJK Unified Ideograph-4E36" , '\\' ) ,
260
+ ( '⼂' , "Kangxi Radical Dot" , '\\' ) ,
181
261
( '、' , "Ideographic Comma" , '\\' ) ,
182
262
( 'ヽ' , "Katakana Iteration Mark" , '\\' ) ,
183
- ( '㇔' , "Cjk Stroke D" , '\\' ) ,
184
- ( '丶' , "Cjk Unified Ideograph-4E36" , '\\' ) ,
185
- ( '⼂' , "Kangxi Radical Dot" , '\\' ) ,
263
+
186
264
( 'ꝸ' , "Latin Small Letter Um" , '&' ) ,
265
+ ( '&' , "Fullwidth Ampersand" , '&' ) ,
266
+
267
+ ( '᛭' , "Runic Cros Punctuation" , '+' ) ,
268
+ ( '➕' , "Heavy Plus Sign" , '+' ) ,
269
+ ( '𐊛' , "Lycian Letter H" , '+' ) ,
187
270
( '﬩' , "Hebrew Letter Alternative Plus Sign" , '+' ) ,
271
+ ( '+' , "Fullwidth Plus Sign" , '+' ) ,
272
+
188
273
( '‹' , "Single Left-Pointing Angle Quotation Mark" , '<' ) ,
189
274
( '❮' , "Heavy Left-Pointing Angle Quotation Mark Ornament" , '<' ) ,
190
275
( '˂' , "Modifier Letter Left Arrowhead" , '<' ) ,
276
+ ( '𝈶' , "Greek Instrumental Symbol-40" , '<' ) ,
277
+ ( 'ᐸ' , "Canadian Syllabics Pa" , '<' ) ,
278
+ ( 'ᚲ' , "Runic Letter Kauna" , '<' ) ,
279
+ ( '❬' , "Medium Left-Pointing Angle Bracket Ornament" , '<' ) ,
280
+ ( '⟨' , "Mathematical Left Angle Bracket" , '<' ) ,
281
+ ( '〈' , "Left-Pointing Angle Bracket" , '<' ) ,
191
282
( '〈' , "Left Angle Bracket" , '<' ) ,
283
+ ( '㇛' , "CJK Stroke Pd" , '<' ) ,
284
+ ( 'く' , "Hiragana Letter Ku" , '<' ) ,
285
+ ( '𡿨' , "CJK Unified Ideograph-21FE8" , '<' ) ,
192
286
( '《' , "Left Double Angle Bracket" , '<' ) ,
287
+ ( '<' , "Fullwidth Less-Than Sign" , '<' ) ,
288
+
289
+ ( '᐀' , "Canadian Syllabics Hyphen" , '=' ) ,
290
+ ( '⹀' , "Double Hyphen" , '=' ) ,
291
+ ( '゠' , "Katakana-Hiragana Double Hyphen" , '=' ) ,
193
292
( '꓿' , "Lisu Punctuation Full Stop" , '=' ) ,
293
+ ( '=' , "Fullwidth Equals Sign" , '=' ) ,
294
+
194
295
( '›' , "Single Right-Pointing Angle Quotation Mark" , '>' ) ,
195
296
( '❯' , "Heavy Right-Pointing Angle Quotation Mark Ornament" , '>' ) ,
196
297
( '˃' , "Modifier Letter Right Arrowhead" , '>' ) ,
298
+ ( '𝈷' , "Greek Instrumental Symbol-42" , '>' ) ,
299
+ ( 'ᐳ' , "Canadian Syllabics Po" , '>' ) ,
300
+ ( '𖼿' , "Miao Letter Archaic Zza" , '>' ) ,
301
+ ( '❭' , "Medium Right-Pointing Angle Bracket Ornament" , '>' ) ,
302
+ ( '⟩' , "Mathematical Right Angle Bracket" , '>' ) ,
303
+ ( '〉' , "Right-Pointing Angle Bracket" , '>' ) ,
197
304
( '〉' , "Right Angle Bracket" , '>' ) ,
198
305
( '》' , "Right Double Angle Bracket" , '>' ) ,
199
- ( 'Ⲻ' , "Coptic Capital Letter Dialect-P Ni" , '-' ) ,
200
- ( 'Ɂ' , "Latin Capital Letter Glottal Stop" , '?' ) ,
201
- ( 'Ⳇ' , "Coptic Capital Letter Old Coptic Esh" , '/' ) , ] ;
306
+ ( '>' , "Fullwidth Greater-Than Sign" , '>' ) , ] ;
307
+
202
308
203
309
const ASCII_ARRAY : & ' static [ ( char , & ' static str ) ] = & [
204
310
( ' ' , "Space" ) ,
0 commit comments