From b75021b31e43d59eb2b91fe45cfffcb7d25fb141 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Tue, 25 Jun 2019 21:02:19 +0300 Subject: [PATCH 1/2] refactor lexer to use idiomatic borrowing --- src/libsyntax/parse/lexer/mod.rs | 225 ++++++++++++++----------------- 1 file changed, 104 insertions(+), 121 deletions(-) diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs index ead5d543bec7d..7b9e3bc4d5102 100644 --- a/src/libsyntax/parse/lexer/mod.rs +++ b/src/libsyntax/parse/lexer/mod.rs @@ -321,33 +321,29 @@ impl<'a> StringReader<'a> { (pos - self.source_file.start_pos).to_usize() } - /// Calls `f` with a string slice of the source text spanning from `start` - /// up to but excluding `self.pos`, meaning the slice does not include - /// the character `self.ch`. - fn with_str_from(&self, start: BytePos, f: F) -> T - where F: FnOnce(&str) -> T + /// Slice of the source text from `start` up to but excluding `self.pos`, + /// meaning the slice does not include the character `self.ch`. + fn str_from(&self, start: BytePos) -> &str { - self.with_str_from_to(start, self.pos, f) + self.str_from_to(start, self.pos) } /// Creates a Name from a given offset to the current offset. fn name_from(&self, start: BytePos) -> ast::Name { debug!("taking an ident from {:?} to {:?}", start, self.pos); - self.with_str_from(start, Symbol::intern) + Symbol::intern(self.str_from(start)) } /// As name_from, with an explicit endpoint. fn name_from_to(&self, start: BytePos, end: BytePos) -> ast::Name { debug!("taking an ident from {:?} to {:?}", start, end); - self.with_str_from_to(start, end, Symbol::intern) + Symbol::intern(self.str_from_to(start, end)) } - /// Calls `f` with a string slice of the source text spanning from `start` - /// up to but excluding `end`. - fn with_str_from_to(&self, start: BytePos, end: BytePos, f: F) -> T - where F: FnOnce(&str) -> T + /// Slice of the source text spanning from `start` up to but excluding `end`. + fn str_from_to(&self, start: BytePos, end: BytePos) -> &str { - f(&self.src[self.src_index(start)..self.src_index(end)]) + &self.src[self.src_index(start)..self.src_index(end)] } /// Converts CRLF to LF in the given string, raising an error on bare CR. @@ -456,8 +452,8 @@ impl<'a> StringReader<'a> { self.bump(); } - self.with_str_from(start, |string| { - if string == "_" { + match self.str_from(start) { + "_" => { self.sess.span_diagnostic .struct_span_warn(self.mk_sp(start, self.pos), "underscore literal suffix is not allowed") @@ -468,10 +464,9 @@ impl<'a> StringReader<'a> { ") .emit(); None - } else { - Some(Symbol::intern(string)) } - }) + name => Some(Symbol::intern(name)) + } } /// PRECONDITION: self.ch is not whitespace @@ -513,9 +508,7 @@ impl<'a> StringReader<'a> { } let kind = if doc_comment { - self.with_str_from(start_bpos, |string| { - token::DocComment(Symbol::intern(string)) - }) + token::DocComment(self.name_from(start_bpos)) } else { token::Comment }; @@ -615,23 +608,22 @@ impl<'a> StringReader<'a> { self.bump(); } - self.with_str_from(start_bpos, |string| { - // but comments with only "*"s between two "/"s are not - let kind = if is_block_doc_comment(string) { - let string = if has_cr { - self.translate_crlf(start_bpos, - string, - "bare CR not allowed in block doc-comment") - } else { - string.into() - }; - token::DocComment(Symbol::intern(&string[..])) + let string = self.str_from(start_bpos); + // but comments with only "*"s between two "/"s are not + let kind = if is_block_doc_comment(string) { + let string = if has_cr { + self.translate_crlf(start_bpos, + string, + "bare CR not allowed in block doc-comment") } else { - token::Comment + string.into() }; + token::DocComment(Symbol::intern(&string[..])) + } else { + token::Comment + }; - Some(Token::new(kind, self.mk_sp(start_bpos, self.pos))) - }) + Some(Token::new(kind, self.mk_sp(start_bpos, self.pos))) } /// Scan through any digits (base `scan_radix`) or underscores, @@ -838,20 +830,17 @@ impl<'a> StringReader<'a> { self.bump(); } - return Ok(self.with_str_from(start, |string| { - // FIXME: perform NFKC normalization here. (Issue #2253) - let name = ast::Name::intern(string); - - if is_raw_ident { - let span = self.mk_sp(raw_start, self.pos); - if !name.can_be_raw() { - self.err_span(span, &format!("`{}` cannot be a raw identifier", name)); - } - self.sess.raw_identifier_spans.borrow_mut().push(span); + // FIXME: perform NFKC normalization here. (Issue #2253) + let name = self.name_from(start); + if is_raw_ident { + let span = self.mk_sp(raw_start, self.pos); + if !name.can_be_raw() { + self.err_span(span, &format!("`{}` cannot be a raw identifier", name)); } + self.sess.raw_identifier_spans.borrow_mut().push(span); + } - token::Ident(name, is_raw_ident) - })); + return Ok(token::Ident(name, is_raw_ident)); } } @@ -1300,101 +1289,95 @@ impl<'a> StringReader<'a> { } fn validate_char_escape(&self, start_with_quote: BytePos) { - self.with_str_from_to(start_with_quote + BytePos(1), self.pos - BytePos(1), |lit| { - if let Err((off, err)) = unescape::unescape_char(lit) { + let lit = self.str_from_to(start_with_quote + BytePos(1), self.pos - BytePos(1)); + if let Err((off, err)) = unescape::unescape_char(lit) { + emit_unescape_error( + &self.sess.span_diagnostic, + lit, + self.mk_sp(start_with_quote, self.pos), + unescape::Mode::Char, + 0..off, + err, + ) + } + } + + fn validate_byte_escape(&self, start_with_quote: BytePos) { + let lit = self.str_from_to(start_with_quote + BytePos(1), self.pos - BytePos(1)); + if let Err((off, err)) = unescape::unescape_byte(lit) { + emit_unescape_error( + &self.sess.span_diagnostic, + lit, + self.mk_sp(start_with_quote, self.pos), + unescape::Mode::Byte, + 0..off, + err, + ) + } + } + + fn validate_str_escape(&self, start_with_quote: BytePos) { + let lit = self.str_from_to(start_with_quote + BytePos(1), self.pos - BytePos(1)); + unescape::unescape_str(lit, &mut |range, c| { + if let Err(err) = c { emit_unescape_error( &self.sess.span_diagnostic, lit, self.mk_sp(start_with_quote, self.pos), - unescape::Mode::Char, - 0..off, + unescape::Mode::Str, + range, err, ) } - }); + }) } - fn validate_byte_escape(&self, start_with_quote: BytePos) { - self.with_str_from_to(start_with_quote + BytePos(1), self.pos - BytePos(1), |lit| { - if let Err((off, err)) = unescape::unescape_byte(lit) { + fn validate_raw_str_escape(&self, content_start: BytePos, content_end: BytePos) { + let lit = self.str_from_to(content_start, content_end); + unescape::unescape_raw_str(lit, &mut |range, c| { + if let Err(err) = c { emit_unescape_error( &self.sess.span_diagnostic, lit, - self.mk_sp(start_with_quote, self.pos), - unescape::Mode::Byte, - 0..off, + self.mk_sp(content_start - BytePos(1), content_end + BytePos(1)), + unescape::Mode::Str, + range, err, ) } - }); - } - - fn validate_str_escape(&self, start_with_quote: BytePos) { - self.with_str_from_to(start_with_quote + BytePos(1), self.pos - BytePos(1), |lit| { - unescape::unescape_str(lit, &mut |range, c| { - if let Err(err) = c { - emit_unescape_error( - &self.sess.span_diagnostic, - lit, - self.mk_sp(start_with_quote, self.pos), - unescape::Mode::Str, - range, - err, - ) - } - }) - }); - } - - fn validate_raw_str_escape(&self, content_start: BytePos, content_end: BytePos) { - self.with_str_from_to(content_start, content_end, |lit: &str| { - unescape::unescape_raw_str(lit, &mut |range, c| { - if let Err(err) = c { - emit_unescape_error( - &self.sess.span_diagnostic, - lit, - self.mk_sp(content_start - BytePos(1), content_end + BytePos(1)), - unescape::Mode::Str, - range, - err, - ) - } - }) - }); + }) } fn validate_raw_byte_str_escape(&self, content_start: BytePos, content_end: BytePos) { - self.with_str_from_to(content_start, content_end, |lit: &str| { - unescape::unescape_raw_byte_str(lit, &mut |range, c| { - if let Err(err) = c { - emit_unescape_error( - &self.sess.span_diagnostic, - lit, - self.mk_sp(content_start - BytePos(1), content_end + BytePos(1)), - unescape::Mode::ByteStr, - range, - err, - ) - } - }) - }); + let lit = self.str_from_to(content_start, content_end); + unescape::unescape_raw_byte_str(lit, &mut |range, c| { + if let Err(err) = c { + emit_unescape_error( + &self.sess.span_diagnostic, + lit, + self.mk_sp(content_start - BytePos(1), content_end + BytePos(1)), + unescape::Mode::ByteStr, + range, + err, + ) + } + }) } fn validate_byte_str_escape(&self, start_with_quote: BytePos) { - self.with_str_from_to(start_with_quote + BytePos(1), self.pos - BytePos(1), |lit| { - unescape::unescape_byte_str(lit, &mut |range, c| { - if let Err(err) = c { - emit_unescape_error( - &self.sess.span_diagnostic, - lit, - self.mk_sp(start_with_quote, self.pos), - unescape::Mode::ByteStr, - range, - err, - ) - } - }) - }); + let lit = self.str_from_to(start_with_quote + BytePos(1), self.pos - BytePos(1)); + unescape::unescape_byte_str(lit, &mut |range, c| { + if let Err(err) = c { + emit_unescape_error( + &self.sess.span_diagnostic, + lit, + self.mk_sp(start_with_quote, self.pos), + unescape::Mode::ByteStr, + range, + err, + ) + } + }) } } From 57db25e614dfc3ea3c407374a562501471eb1c5d Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Tue, 25 Jun 2019 21:37:25 +0300 Subject: [PATCH 2/2] cleanup: rename name_from to symbol_from Lexer uses Symbols for a lot of stuff, not only for identifiers, so the "name" terminology is just confusing. --- src/libsyntax/parse/lexer/mod.rs | 39 ++++++++++++++++---------------- 1 file changed, 19 insertions(+), 20 deletions(-) diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs index 7b9e3bc4d5102..4e4fe4256c9b0 100644 --- a/src/libsyntax/parse/lexer/mod.rs +++ b/src/libsyntax/parse/lexer/mod.rs @@ -1,4 +1,3 @@ -use crate::ast; use crate::parse::ParseSess; use crate::parse::token::{self, Token, TokenKind}; use crate::symbol::{sym, Symbol}; @@ -328,14 +327,14 @@ impl<'a> StringReader<'a> { self.str_from_to(start, self.pos) } - /// Creates a Name from a given offset to the current offset. - fn name_from(&self, start: BytePos) -> ast::Name { + /// Creates a Symbol from a given offset to the current offset. + fn symbol_from(&self, start: BytePos) -> Symbol { debug!("taking an ident from {:?} to {:?}", start, self.pos); Symbol::intern(self.str_from(start)) } - /// As name_from, with an explicit endpoint. - fn name_from_to(&self, start: BytePos, end: BytePos) -> ast::Name { + /// As symbol_from, with an explicit endpoint. + fn symbol_from_to(&self, start: BytePos, end: BytePos) -> Symbol { debug!("taking an ident from {:?} to {:?}", start, end); Symbol::intern(self.str_from_to(start, end)) } @@ -440,7 +439,7 @@ impl<'a> StringReader<'a> { } /// Eats *, if possible. - fn scan_optional_raw_name(&mut self) -> Option { + fn scan_optional_raw_name(&mut self) -> Option { if !ident_start(self.ch) { return None; } @@ -508,7 +507,7 @@ impl<'a> StringReader<'a> { } let kind = if doc_comment { - token::DocComment(self.name_from(start_bpos)) + token::DocComment(self.symbol_from(start_bpos)) } else { token::Comment }; @@ -537,7 +536,7 @@ impl<'a> StringReader<'a> { self.bump(); } return Some(Token::new( - token::Shebang(self.name_from(start)), + token::Shebang(self.symbol_from(start)), self.mk_sp(start, self.pos), )); } @@ -719,17 +718,17 @@ impl<'a> StringReader<'a> { let pos = self.pos; self.check_float_base(start_bpos, pos, base); - (token::Float, self.name_from(start_bpos)) + (token::Float, self.symbol_from(start_bpos)) } else { // it might be a float if it has an exponent if self.ch_is('e') || self.ch_is('E') { self.scan_float_exponent(); let pos = self.pos; self.check_float_base(start_bpos, pos, base); - return (token::Float, self.name_from(start_bpos)); + return (token::Float, self.symbol_from(start_bpos)); } // but we certainly have an integer! - (token::Integer, self.name_from(start_bpos)) + (token::Integer, self.symbol_from(start_bpos)) } } @@ -831,7 +830,7 @@ impl<'a> StringReader<'a> { } // FIXME: perform NFKC normalization here. (Issue #2253) - let name = self.name_from(start); + let name = self.symbol_from(start); if is_raw_ident { let span = self.mk_sp(raw_start, self.pos); if !name.can_be_raw() { @@ -1006,7 +1005,7 @@ impl<'a> StringReader<'a> { // lifetimes shouldn't end with a single quote // if we find one, then this is an invalid character literal if self.ch_is('\'') { - let symbol = self.name_from(start); + let symbol = self.symbol_from(start); self.bump(); self.validate_char_escape(start_with_quote); return Ok(TokenKind::lit(token::Char, symbol, None)); @@ -1024,7 +1023,7 @@ impl<'a> StringReader<'a> { // Include the leading `'` in the real identifier, for macro // expansion purposes. See #12512 for the gory details of why // this is necessary. - return Ok(token::Lifetime(self.name_from(start_with_quote))); + return Ok(token::Lifetime(self.symbol_from(start_with_quote))); } let msg = "unterminated character literal"; let symbol = self.scan_single_quoted_string(start_with_quote, msg); @@ -1052,7 +1051,7 @@ impl<'a> StringReader<'a> { }, Some('r') => { let (start, end, hash_count) = self.scan_raw_string(); - let symbol = self.name_from_to(start, end); + let symbol = self.symbol_from_to(start, end); self.validate_raw_byte_str_escape(start, end); (token::ByteStrRaw(hash_count), symbol) @@ -1073,7 +1072,7 @@ impl<'a> StringReader<'a> { } 'r' => { let (start, end, hash_count) = self.scan_raw_string(); - let symbol = self.name_from_to(start, end); + let symbol = self.symbol_from_to(start, end); self.validate_raw_str_escape(start, end); let suffix = self.scan_optional_raw_name(); @@ -1174,7 +1173,7 @@ impl<'a> StringReader<'a> { fn scan_single_quoted_string(&mut self, start_with_quote: BytePos, - unterminated_msg: &str) -> ast::Name { + unterminated_msg: &str) -> Symbol { // assumes that first `'` is consumed let start = self.pos; // lex `'''` as a single char, for recovery @@ -1206,12 +1205,12 @@ impl<'a> StringReader<'a> { } } - let id = self.name_from(start); + let id = self.symbol_from(start); self.bump(); id } - fn scan_double_quoted_string(&mut self, unterminated_msg: &str) -> ast::Name { + fn scan_double_quoted_string(&mut self, unterminated_msg: &str) -> Symbol { debug_assert!(self.ch_is('\"')); let start_with_quote = self.pos; self.bump(); @@ -1226,7 +1225,7 @@ impl<'a> StringReader<'a> { } self.bump(); } - let id = self.name_from(start); + let id = self.symbol_from(start); self.bump(); id }