|
| 1 | +// Copyright 2012-2013 The Rust Project Developers. See the COPYRIGHT |
| 2 | +// file at the top-level directory of this distribution and at |
| 3 | +// http://rust-lang.org/COPYRIGHT. |
| 4 | +// |
| 5 | +// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or |
| 6 | +// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license |
| 7 | +// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your |
| 8 | +// option. This file may not be copied, modified, or distributed |
| 9 | +// except according to those terms. |
| 10 | + |
| 11 | +// Characters and their corresponding confusables were collected from |
| 12 | +// http://www.unicode.org/Public/security/revision-06/confusables.txt |
| 13 | + |
| 14 | +use codemap::mk_sp as make_span; |
| 15 | +use super::StringReader; |
| 16 | + |
| 17 | +const UNICODE_ARRAY: &'static [(char, &'static str, char)] = &[ |
| 18 | + ('ߺ', "Nko Lajanyalan", '_'), |
| 19 | + ('﹍', "Dashed Low Line", '_'), |
| 20 | + ('﹎', "Centreline Low Line", '_'), |
| 21 | + ('﹏', "Wavy Low Line", '_'), |
| 22 | + ('‐', "Hyphen", '-'), |
| 23 | + ('‑', "Non-Breaking Hyphen", '-'), |
| 24 | + ('‒', "Figure Dash", '-'), |
| 25 | + ('–', "En Dash", '-'), |
| 26 | + ('﹘', "Small Em Dash", '-'), |
| 27 | + ('⁃', "Hyphen Bullet", '-'), |
| 28 | + ('˗', "Modifier Letter Minus Sign", '-'), |
| 29 | + ('−', "Minus Sign", '-'), |
| 30 | + ('٫', "Arabic Decimal Separator", ','), |
| 31 | + ('‚', "Single Low-9 Quotation Mark", ','), |
| 32 | + ('ꓹ', "Lisu Letter Tone Na Po", ','), |
| 33 | + (';', "Greek Question Mark", ';'), |
| 34 | + ('ः', "Devanagari Sign Visarga", ':'), |
| 35 | + ('ઃ', "Gujarati Sign Visarga", ':'), |
| 36 | + (':', "Fullwidth Colon", ':'), |
| 37 | + ('։', "Armenian Full Stop", ':'), |
| 38 | + ('܃', "Syriac Supralinear Colon", ':'), |
| 39 | + ('܄', "Syriac Sublinear Colon", ':'), |
| 40 | + ('︰', "Presentation Form For Vertical Two Dot Leader", ':'), |
| 41 | + ('᠃', "Mongolian Full Stop", ':'), |
| 42 | + ('᠉', "Mongolian Manchu Full Stop", ':'), |
| 43 | + ('⁚', "Two Dot Punctuation", ':'), |
| 44 | + ('׃', "Hebrew Punctuation Sof Pasuq", ':'), |
| 45 | + ('˸', "Modifier Letter Raised Colon", ':'), |
| 46 | + ('꞉', "Modifier Letter Colon", ':'), |
| 47 | + ('∶', "Ratio", ':'), |
| 48 | + ('ː', "Modifier Letter Triangular Colon", ':'), |
| 49 | + ('ꓽ', "Lisu Letter Tone Mya Jeu", ':'), |
| 50 | + ('!', "Fullwidth Exclamation Mark", '!'), |
| 51 | + ('ǃ', "Latin Letter Retroflex Click", '!'), |
| 52 | + ('ʔ', "Latin Letter Glottal Stop", '?'), |
| 53 | + ('ॽ', "Devanagari Letter Glottal Stop", '?'), |
| 54 | + ('Ꭾ', "Cherokee Letter He", '?'), |
| 55 | + ('𝅭', "Musical Symbol Combining Augmentation Dot", '.'), |
| 56 | + ('․', "One Dot Leader", '.'), |
| 57 | + ('۔', "Arabic Full Stop", '.'), |
| 58 | + ('܁', "Syriac Supralinear Full Stop", '.'), |
| 59 | + ('܂', "Syriac Sublinear Full Stop", '.'), |
| 60 | + ('꘎', "Vai Full Stop", '.'), |
| 61 | + ('𐩐', "Kharoshthi Punctuation Dot", '.'), |
| 62 | + ('٠', "Arabic-Indic Digit Zero", '.'), |
| 63 | + ('۰', "Extended Arabic-Indic Digit Zero", '.'), |
| 64 | + ('ꓸ', "Lisu Letter Tone Mya Ti", '.'), |
| 65 | + ('՝', "Armenian Comma", '\''), |
| 66 | + (''', "Fullwidth Apostrophe", '\''), |
| 67 | + ('‘', "Left Single Quotation Mark", '\''), |
| 68 | + ('’', "Right Single Quotation Mark", '\''), |
| 69 | + ('‛', "Single High-Reversed-9 Quotation Mark", '\''), |
| 70 | + ('′', "Prime", '\''), |
| 71 | + ('‵', "Reversed Prime", '\''), |
| 72 | + ('՚', "Armenian Apostrophe", '\''), |
| 73 | + ('׳', "Hebrew Punctuation Geresh", '\''), |
| 74 | + ('`', "Greek Varia", '\''), |
| 75 | + ('`', "Fullwidth Grave Accent", '\''), |
| 76 | + ('΄', "Greek Tonos", '\''), |
| 77 | + ('´', "Greek Oxia", '\''), |
| 78 | + ('᾽', "Greek Koronis", '\''), |
| 79 | + ('᾿', "Greek Psili", '\''), |
| 80 | + ('῾', "Greek Dasia", '\''), |
| 81 | + ('ʹ', "Modifier Letter Prime", '\''), |
| 82 | + ('ʹ', "Greek Numeral Sign", '\''), |
| 83 | + ('ˊ', "Modifier Letter Acute Accent", '\''), |
| 84 | + ('ˋ', "Modifier Letter Grave Accent", '\''), |
| 85 | + ('˴', "Modifier Letter Middle Grave Accent", '\''), |
| 86 | + ('ʻ', "Modifier Letter Turned Comma", '\''), |
| 87 | + ('ʽ', "Modifier Letter Reversed Comma", '\''), |
| 88 | + ('ʼ', "Modifier Letter Apostrophe", '\''), |
| 89 | + ('ʾ', "Modifier Letter Right Half Ring", '\''), |
| 90 | + ('ꞌ', "Latin Small Letter Saltillo", '\''), |
| 91 | + ('י', "Hebrew Letter Yod", '\''), |
| 92 | + ('ߴ', "Nko High Tone Apostrophe", '\''), |
| 93 | + ('ߵ', "Nko Low Tone Apostrophe", '\''), |
| 94 | + ('[', "Fullwidth Left Square Bracket", '('), |
| 95 | + ('❨', "Medium Left Parenthesis Ornament", '('), |
| 96 | + ('❲', "Light Left Tortoise Shell Bracket Ornament", '('), |
| 97 | + ('〔', "Left Tortoise Shell Bracket", '('), |
| 98 | + ('﴾', "Ornate Left Parenthesis", '('), |
| 99 | + (']', "Fullwidth Right Square Bracket", ')'), |
| 100 | + ('❩', "Medium Right Parenthesis Ornament", ')'), |
| 101 | + ('❳', "Light Right Tortoise Shell Bracket Ornament", ')'), |
| 102 | + ('〕', "Right Tortoise Shell Bracket", ')'), |
| 103 | + ('﴿', "Ornate Right Parenthesis", ')'), |
| 104 | + ('❴', "Medium Left Curly Bracket Ornament", '{'), |
| 105 | + ('❵', "Medium Right Curly Bracket Ornament", '}'), |
| 106 | + ('⁎', "Low Asterisk", '*'), |
| 107 | + ('٭', "Arabic Five Pointed Star", '*'), |
| 108 | + ('∗', "Asterisk Operator", '*'), |
| 109 | + ('᜵', "Philippine Single Punctuation", '/'), |
| 110 | + ('⁁', "Caret Insertion Point", '/'), |
| 111 | + ('∕', "Division Slash", '/'), |
| 112 | + ('⁄', "Fraction Slash", '/'), |
| 113 | + ('╱', "Box Drawings Light Diagonal Upper Right To Lower Left", '/'), |
| 114 | + ('⟋', "Mathematical Rising Diagonal", '/'), |
| 115 | + ('⧸', "Big Solidus", '/'), |
| 116 | + ('㇓', "Cjk Stroke Sp", '/'), |
| 117 | + ('〳', "Vertical Kana Repeat Mark Upper Half", '/'), |
| 118 | + ('丿', "Cjk Unified Ideograph-4E3F", '/'), |
| 119 | + ('⼃', "Kangxi Radical Slash", '/'), |
| 120 | + ('\', "Fullwidth Reverse Solidus", '\\'), |
| 121 | + ('﹨', "Small Reverse Solidus", '\\'), |
| 122 | + ('∖', "Set Minus", '\\'), |
| 123 | + ('⟍', "Mathematical Falling Diagonal", '\\'), |
| 124 | + ('⧵', "Reverse Solidus Operator", '\\'), |
| 125 | + ('⧹', "Big Reverse Solidus", '\\'), |
| 126 | + ('㇔', "Cjk Stroke D", '\\'), |
| 127 | + ('丶', "Cjk Unified Ideograph-4E36", '\\'), |
| 128 | + ('⼂', "Kangxi Radical Dot", '\\'), |
| 129 | + ('ꝸ', "Latin Small Letter Um", '&'), |
| 130 | + ('﬩', "Hebrew Letter Alternative Plus Sign", '+'), |
| 131 | + ('‹', "Single Left-Pointing Angle Quotation Mark", '<'), |
| 132 | + ('❮', "Heavy Left-Pointing Angle Quotation Mark Ornament", '<'), |
| 133 | + ('˂', "Modifier Letter Left Arrowhead", '<'), |
| 134 | + ('꓿', "Lisu Punctuation Full Stop", '='), |
| 135 | + ('›', "Single Right-Pointing Angle Quotation Mark", '>'), |
| 136 | + ('❯', "Heavy Right-Pointing Angle Quotation Mark Ornament", '>'), |
| 137 | + ('˃', "Modifier Letter Right Arrowhead", '>'), |
| 138 | + ('Ⲻ', "Coptic Capital Letter Dialect-P Ni", '-'), |
| 139 | + ('Ɂ', "Latin Capital Letter Glottal Stop", '?'), |
| 140 | + ('Ⳇ', "Coptic Capital Letter Old Coptic Esh", '/'), ]; |
| 141 | + |
| 142 | +const ASCII_ARRAY: &'static [(char, &'static str)] = &[ |
| 143 | + ('_', "Underscore"), |
| 144 | + ('-', "Minus/Hyphen"), |
| 145 | + (',', "Comma"), |
| 146 | + (';', "Semicolon"), |
| 147 | + (':', "Colon"), |
| 148 | + ('!', "Exclamation Mark"), |
| 149 | + ('?', "Question Mark"), |
| 150 | + ('.', "Period"), |
| 151 | + ('\'', "Single Quote"), |
| 152 | + ('(', "Left Parenthesis"), |
| 153 | + (')', "Right Parenthesis"), |
| 154 | + ('{', "Left Curly Brace"), |
| 155 | + ('}', "Right Curly Brace"), |
| 156 | + ('*', "Asterisk"), |
| 157 | + ('/', "Slash"), |
| 158 | + ('\\', "Backslash"), |
| 159 | + ('&', "Ampersand"), |
| 160 | + ('+', "Plus Sign"), |
| 161 | + ('<', "Less-Than Sign"), |
| 162 | + ('=', "Equals Sign"), |
| 163 | + ('>', "Greater-Than Sign"), ]; |
| 164 | + |
| 165 | +pub fn check_for_substitution(reader: &StringReader, ch: char) { |
| 166 | + UNICODE_ARRAY |
| 167 | + .iter() |
| 168 | + .find(|&&(c, _, _)| c == ch) |
| 169 | + .map(|&(_, u_name, ascii_char)| { |
| 170 | + let span = make_span(reader.last_pos, reader.pos); |
| 171 | + match ASCII_ARRAY.iter().find(|&&(c, _)| c == ascii_char) { |
| 172 | + Some(&(ascii_char, ascii_name)) => { |
| 173 | + let msg = |
| 174 | + format!("unicode character '{}' ({}) looks much like '{}' ({}), but it's not", |
| 175 | + ch, u_name, ascii_char, ascii_name); |
| 176 | + reader.help_span(span, &msg); |
| 177 | + }, |
| 178 | + None => { |
| 179 | + reader |
| 180 | + .span_diagnostic |
| 181 | + .span_bug_no_panic(span, |
| 182 | + &format!("substitution character not found for '{}'", ch)); |
| 183 | + } |
| 184 | + } |
| 185 | + }); |
| 186 | +} |
0 commit comments