From dbe700ef656b63a8c630dac41c46f1b3e3323727 Mon Sep 17 00:00:00 2001 From: Wang Xuerui Date: Thu, 21 Apr 2016 17:51:47 +0800 Subject: [PATCH 1/4] add more characters easily inputtable with CJK IMEs --- src/libsyntax/parse/lexer/unicode_chars.rs | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/libsyntax/parse/lexer/unicode_chars.rs b/src/libsyntax/parse/lexer/unicode_chars.rs index 1d32dd4973127..f74746fe88576 100644 --- a/src/libsyntax/parse/lexer/unicode_chars.rs +++ b/src/libsyntax/parse/lexer/unicode_chars.rs @@ -24,14 +24,18 @@ const UNICODE_ARRAY: &'static [(char, &'static str, char)] = &[ ('‑', "Non-Breaking Hyphen", '-'), ('‒', "Figure Dash", '-'), ('–', "En Dash", '-'), + ('—', "Em Dash", '-'), ('﹘', "Small Em Dash", '-'), ('⁃', "Hyphen Bullet", '-'), ('˗', "Modifier Letter Minus Sign", '-'), ('−', "Minus Sign", '-'), + ('ー', "Katakana-Hiragana Prolonged Sound Mark", '-'), ('٫', "Arabic Decimal Separator", ','), ('‚', "Single Low-9 Quotation Mark", ','), ('ꓹ', "Lisu Letter Tone Na Po", ','), + (',', "Fullwidth Comma", ','), (';', "Greek Question Mark", ';'), + (';', "Fullwidth Semicolon", ';'), ('ः', "Devanagari Sign Visarga", ':'), ('ઃ', "Gujarati Sign Visarga", ':'), (':', "Fullwidth Colon", ':'), @@ -53,6 +57,7 @@ const UNICODE_ARRAY: &'static [(char, &'static str, char)] = &[ ('ʔ', "Latin Letter Glottal Stop", '?'), ('ॽ', "Devanagari Letter Glottal Stop", '?'), ('Ꭾ', "Cherokee Letter He", '?'), + ('?', "Fullwidth Question Mark", '?'), ('𝅭', "Musical Symbol Combining Augmentation Dot", '.'), ('․', "One Dot Leader", '.'), ('۔', "Arabic Full Stop", '.'), @@ -60,9 +65,12 @@ const UNICODE_ARRAY: &'static [(char, &'static str, char)] = &[ ('܂', "Syriac Sublinear Full Stop", '.'), ('꘎', "Vai Full Stop", '.'), ('𐩐', "Kharoshthi Punctuation Dot", '.'), + ('·', "Middle Dot", '.'), ('٠', "Arabic-Indic Digit Zero", '.'), ('۰', "Extended Arabic-Indic Digit Zero", '.'), ('ꓸ', "Lisu Letter Tone Mya Ti", '.'), + ('。', "Ideographic Full Stop", '.'), + ('・', "Katakana Middle Dot", '.'), ('՝', "Armenian Comma", '\''), (''', "Fullwidth Apostrophe", '\''), ('‘', "Left Single Quotation Mark", '\''), @@ -113,11 +121,13 @@ const UNICODE_ARRAY: &'static [(char, &'static str, char)] = &[ ('❲', "Light Left Tortoise Shell Bracket Ornament", '('), ('〔', "Left Tortoise Shell Bracket", '('), ('﴾', "Ornate Left Parenthesis", '('), + ('(', "Fullwidth Left Parenthesis", '('), (']', "Fullwidth Right Square Bracket", ')'), ('❩', "Medium Right Parenthesis Ornament", ')'), ('❳', "Light Right Tortoise Shell Bracket Ornament", ')'), ('〕', "Right Tortoise Shell Bracket", ')'), ('﴿', "Ornate Right Parenthesis", ')'), + (')', "Fullwidth Right Parenthesis", ')'), ('❴', "Medium Left Curly Bracket Ornament", '{'), ('❵', "Medium Right Curly Bracket Ornament", '}'), ('⁎', "Low Asterisk", '*'), @@ -140,6 +150,8 @@ const UNICODE_ARRAY: &'static [(char, &'static str, char)] = &[ ('⟍', "Mathematical Falling Diagonal", '\\'), ('⧵', "Reverse Solidus Operator", '\\'), ('⧹', "Big Reverse Solidus", '\\'), + ('、', "Ideographic Comma", '\\'), + ('ヽ', "Katakana Iteration Mark", '\\'), ('㇔', "Cjk Stroke D", '\\'), ('丶', "Cjk Unified Ideograph-4E36", '\\'), ('⼂', "Kangxi Radical Dot", '\\'), @@ -148,10 +160,14 @@ const UNICODE_ARRAY: &'static [(char, &'static str, char)] = &[ ('‹', "Single Left-Pointing Angle Quotation Mark", '<'), ('❮', "Heavy Left-Pointing Angle Quotation Mark Ornament", '<'), ('˂', "Modifier Letter Left Arrowhead", '<'), + ('〈', "Left Angle Bracket", '<'), + ('《', "Left Double Angle Bracket", '<'), ('꓿', "Lisu Punctuation Full Stop", '='), ('›', "Single Right-Pointing Angle Quotation Mark", '>'), ('❯', "Heavy Right-Pointing Angle Quotation Mark Ornament", '>'), ('˃', "Modifier Letter Right Arrowhead", '>'), + ('〉', "Right Angle Bracket", '>'), + ('》', "Right Double Angle Bracket", '>'), ('Ⲻ', "Coptic Capital Letter Dialect-P Ni", '-'), ('Ɂ', "Latin Capital Letter Glottal Stop", '?'), ('Ⳇ', "Coptic Capital Letter Old Coptic Esh", '/'), ]; From 5f70e8f6cdf7392f70db1ee1a1e0668d056ab527 Mon Sep 17 00:00:00 2001 From: Wang Xuerui Date: Thu, 21 Apr 2016 20:05:47 +0800 Subject: [PATCH 2/4] add confusable space characters --- src/libsyntax/parse/lexer/unicode_chars.rs | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/src/libsyntax/parse/lexer/unicode_chars.rs b/src/libsyntax/parse/lexer/unicode_chars.rs index f74746fe88576..96cfb4dfb2dac 100644 --- a/src/libsyntax/parse/lexer/unicode_chars.rs +++ b/src/libsyntax/parse/lexer/unicode_chars.rs @@ -16,6 +16,22 @@ use errors::DiagnosticBuilder; use super::StringReader; const UNICODE_ARRAY: &'static [(char, &'static str, char)] = &[ + (' ', "No-Break Space", ' '), + (' ', "Ogham Space Mark", ' '), + (' ', "En Quad", ' '), + (' ', "Em Quad", ' '), + (' ', "En Space", ' '), + (' ', "Em Space", ' '), + (' ', "Three-Per-Em Space", ' '), + (' ', "Four-Per-Em Space", ' '), + (' ', "Six-Per-Em Space", ' '), + (' ', "Figure Space", ' '), + (' ', "Punctuation Space", ' '), + (' ', "Thin Space", ' '), + (' ', "Hair Space", ' '), + (' ', "Narrow No-Break Space", ' '), + (' ', "Medium Mathematical Space", ' '), + (' ', "Ideographic Space", ' '), ('ߺ', "Nko Lajanyalan", '_'), ('﹍', "Dashed Low Line", '_'), ('﹎', "Centreline Low Line", '_'), @@ -173,6 +189,7 @@ const UNICODE_ARRAY: &'static [(char, &'static str, char)] = &[ ('Ⳇ', "Coptic Capital Letter Old Coptic Esh", '/'), ]; const ASCII_ARRAY: &'static [(char, &'static str)] = &[ + (' ', "Space"), ('_', "Underscore"), ('-', "Minus/Hyphen"), (',', "Comma"), From 47d5c90fbe7d0ccc30a1ab415355755ff104bc9a Mon Sep 17 00:00:00 2001 From: Wang Xuerui Date: Thu, 21 Apr 2016 20:09:26 +0800 Subject: [PATCH 3/4] correct aliases for square brackets --- src/libsyntax/parse/lexer/unicode_chars.rs | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/libsyntax/parse/lexer/unicode_chars.rs b/src/libsyntax/parse/lexer/unicode_chars.rs index 96cfb4dfb2dac..59133d62d5293 100644 --- a/src/libsyntax/parse/lexer/unicode_chars.rs +++ b/src/libsyntax/parse/lexer/unicode_chars.rs @@ -132,18 +132,18 @@ const UNICODE_ARRAY: &'static [(char, &'static str, char)] = &[ ('ײ', "Hebrew Ligature Yiddish Double Yod", '"'), ('❞', "Heavy Double Comma Quotation Mark Ornament", '"'), ('❝', "Heavy Double Turned Comma Quotation Mark Ornament", '"'), - ('[', "Fullwidth Left Square Bracket", '('), ('❨', "Medium Left Parenthesis Ornament", '('), - ('❲', "Light Left Tortoise Shell Bracket Ornament", '('), - ('〔', "Left Tortoise Shell Bracket", '('), ('﴾', "Ornate Left Parenthesis", '('), ('(', "Fullwidth Left Parenthesis", '('), - (']', "Fullwidth Right Square Bracket", ')'), ('❩', "Medium Right Parenthesis Ornament", ')'), - ('❳', "Light Right Tortoise Shell Bracket Ornament", ')'), - ('〕', "Right Tortoise Shell Bracket", ')'), ('﴿', "Ornate Right Parenthesis", ')'), (')', "Fullwidth Right Parenthesis", ')'), + ('[', "Fullwidth Left Square Bracket", '['), + ('❲', "Light Left Tortoise Shell Bracket Ornament", '['), + ('〔', "Left Tortoise Shell Bracket", '['), + (']', "Fullwidth Right Square Bracket", ']'), + ('❳', "Light Right Tortoise Shell Bracket Ornament", ']'), + ('〕', "Right Tortoise Shell Bracket", ']'), ('❴', "Medium Left Curly Bracket Ornament", '{'), ('❵', "Medium Right Curly Bracket Ornament", '}'), ('⁎', "Low Asterisk", '*'), @@ -202,6 +202,8 @@ const ASCII_ARRAY: &'static [(char, &'static str)] = &[ ('"', "Quotation Mark"), ('(', "Left Parenthesis"), (')', "Right Parenthesis"), + ('[', "Left Square Bracket"), + (']', "Right Square Bracket"), ('{', "Left Curly Brace"), ('}', "Right Curly Brace"), ('*', "Asterisk"), From 496081c5c7bdf3afc2e444c166ee875b4f9041e5 Mon Sep 17 00:00:00 2001 From: Wang Xuerui Date: Thu, 21 Apr 2016 20:17:51 +0800 Subject: [PATCH 4/4] add more confusable CJK square bracket aliases --- src/libsyntax/parse/lexer/unicode_chars.rs | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/libsyntax/parse/lexer/unicode_chars.rs b/src/libsyntax/parse/lexer/unicode_chars.rs index 59133d62d5293..d337c78bee8b5 100644 --- a/src/libsyntax/parse/lexer/unicode_chars.rs +++ b/src/libsyntax/parse/lexer/unicode_chars.rs @@ -140,10 +140,22 @@ const UNICODE_ARRAY: &'static [(char, &'static str, char)] = &[ (')', "Fullwidth Right Parenthesis", ')'), ('[', "Fullwidth Left Square Bracket", '['), ('❲', "Light Left Tortoise Shell Bracket Ornament", '['), + ('「', "Left Corner Bracket", '['), + ('『', "Left White Corner Bracket", '['), + ('【', "Left Black Lenticular Bracket", '['), ('〔', "Left Tortoise Shell Bracket", '['), + ('〖', "Left White Lenticular Bracket", '['), + ('〘', "Left White Tortoise Shell Bracket", '['), + ('〚', "Left White Square Bracket", '['), (']', "Fullwidth Right Square Bracket", ']'), ('❳', "Light Right Tortoise Shell Bracket Ornament", ']'), + ('」', "Right Corner Bracket", ']'), + ('』', "Right White Corner Bracket", ']'), + ('】', "Right Black Lenticular Bracket", ']'), ('〕', "Right Tortoise Shell Bracket", ']'), + ('〗', "Right White Lenticular Bracket", ']'), + ('〙', "Right White Tortoise Shell Bracket", ']'), + ('〛', "Right White Square Bracket", ']'), ('❴', "Medium Left Curly Bracket Ornament", '{'), ('❵', "Medium Right Curly Bracket Ornament", '}'), ('⁎', "Low Asterisk", '*'),