Skip to content

Commit 6844976

Browse files
committed
review comments: add FIXME comments and formatting
1 parent 70c817a commit 6844976

File tree

2 files changed

+21
-8
lines changed

2 files changed

+21
-8
lines changed

src/libsyntax/parse/lexer/mod.rs

+11-4
Original file line numberDiff line numberDiff line change
@@ -389,11 +389,18 @@ impl<'a> StringReader<'a> {
389389
self.pos,
390390
"unknown start of token",
391391
c);
392-
if let Some(t) = unicode_chars::check_for_substitution(self, start, c, &mut err) {
393-
err.emit();
394-
return Ok(t);
392+
// FIXME: the lexer could be used to turn the ASCII version of unicode homoglyphs,
393+
// instead of keeping a table in `check_for_substitution`into the token. Ideally,
394+
// this should be inside `rustc_lexer`. However, we should first remove compound
395+
// tokens like `<<` from `rustc_lexer`, and then add fancier error recovery to it,
396+
// as there will be less overall work to do this way.
397+
return match unicode_chars::check_for_substitution(self, start, c, &mut err) {
398+
Some(token) => {
399+
err.emit();
400+
Ok(token)
401+
}
402+
None => Err(err),
395403
}
396-
return Err(err)
397404
}
398405
};
399406
Ok(kind)

src/libsyntax/parse/lexer/unicode_chars.rs

+10-4
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
use super::StringReader;
55
use errors::{Applicability, DiagnosticBuilder};
6-
use syntax_pos::{BytePos, Pos, Span, NO_EXPANSION};
6+
use syntax_pos::{BytePos, Pos, Span, NO_EXPANSION, symbol::kw};
77
use crate::parse::token;
88

99
#[rustfmt::skip] // for line breaks
@@ -298,18 +298,20 @@ const UNICODE_ARRAY: &[(char, &str, char)] = &[
298298
('>', "Fullwidth Greater-Than Sign", '>'),
299299
];
300300

301+
// FIXME: the lexer could be used to turn the ASCII version of unicode homoglyphs, instead of
302+
// keeping the substitution token in this table. Ideally, this should be inside `rustc_lexer`.
303+
// However, we should first remove compound tokens like `<<` from `rustc_lexer`, and then add
304+
// fancier error recovery to it, as there will be less overall work to do this way.
301305
const ASCII_ARRAY: &[(char, &str, Option<token::TokenKind>)] = &[
302306
(' ', "Space", Some(token::Whitespace)),
303-
('_', "Underscore", None),
307+
('_', "Underscore", Some(token::Ident(kw::Underscore, false))),
304308
('-', "Minus/Hyphen", Some(token::BinOp(token::Minus))),
305309
(',', "Comma", Some(token::Comma)),
306310
(';', "Semicolon", Some(token::Semi)),
307311
(':', "Colon", Some(token::Colon)),
308312
('!', "Exclamation Mark", Some(token::Not)),
309313
('?', "Question Mark", Some(token::Question)),
310314
('.', "Period", Some(token::Dot)),
311-
('\'', "Single Quote", None), // Literals are already lexed by this point, so we can't recover
312-
('"', "Quotation Mark", None), // gracefully just by spitting the correct token out.
313315
('(', "Left Parenthesis", Some(token::OpenDelim(token::Paren))),
314316
(')', "Right Parenthesis", Some(token::CloseDelim(token::Paren))),
315317
('[', "Left Square Bracket", Some(token::OpenDelim(token::Bracket))),
@@ -324,6 +326,10 @@ const ASCII_ARRAY: &[(char, &str, Option<token::TokenKind>)] = &[
324326
('<', "Less-Than Sign", Some(token::Lt)),
325327
('=', "Equals Sign", Some(token::Eq)),
326328
('>', "Greater-Than Sign", Some(token::Gt)),
329+
// FIXME: Literals are already lexed by this point, so we can't recover gracefully just by
330+
// spitting the correct token out.
331+
('\'', "Single Quote", None),
332+
('"', "Quotation Mark", None),
327333
];
328334

329335
crate fn check_for_substitution<'a>(

0 commit comments

Comments
 (0)