From b75021b31e43d59eb2b91fe45cfffcb7d25fb141 Mon Sep 17 00:00:00 2001
From: Aleksey Kladov <aleksey.kladov@gmail.com>
Date: Tue, 25 Jun 2019 21:02:19 +0300
Subject: [PATCH 1/2] refactor lexer to use idiomatic borrowing

---
 src/libsyntax/parse/lexer/mod.rs | 225 ++++++++++++++-----------------
 1 file changed, 104 insertions(+), 121 deletions(-)

diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs
index ead5d543bec7d..7b9e3bc4d5102 100644
--- a/src/libsyntax/parse/lexer/mod.rs
+++ b/src/libsyntax/parse/lexer/mod.rs
@@ -321,33 +321,29 @@ impl<'a> StringReader<'a> {
         (pos - self.source_file.start_pos).to_usize()
     }
 
-    /// Calls `f` with a string slice of the source text spanning from `start`
-    /// up to but excluding `self.pos`, meaning the slice does not include
-    /// the character `self.ch`.
-    fn with_str_from<T, F>(&self, start: BytePos, f: F) -> T
-        where F: FnOnce(&str) -> T
+    /// Slice of the source text from `start` up to but excluding `self.pos`,
+    /// meaning the slice does not include the character `self.ch`.
+    fn str_from(&self, start: BytePos) -> &str
     {
-        self.with_str_from_to(start, self.pos, f)
+        self.str_from_to(start, self.pos)
     }
 
     /// Creates a Name from a given offset to the current offset.
     fn name_from(&self, start: BytePos) -> ast::Name {
         debug!("taking an ident from {:?} to {:?}", start, self.pos);
-        self.with_str_from(start, Symbol::intern)
+        Symbol::intern(self.str_from(start))
     }
 
     /// As name_from, with an explicit endpoint.
     fn name_from_to(&self, start: BytePos, end: BytePos) -> ast::Name {
         debug!("taking an ident from {:?} to {:?}", start, end);
-        self.with_str_from_to(start, end, Symbol::intern)
+        Symbol::intern(self.str_from_to(start, end))
     }
 
-    /// Calls `f` with a string slice of the source text spanning from `start`
-    /// up to but excluding `end`.
-    fn with_str_from_to<T, F>(&self, start: BytePos, end: BytePos, f: F) -> T
-        where F: FnOnce(&str) -> T
+    /// Slice of the source text spanning from `start` up to but excluding `end`.
+    fn str_from_to(&self, start: BytePos, end: BytePos) -> &str
     {
-        f(&self.src[self.src_index(start)..self.src_index(end)])
+        &self.src[self.src_index(start)..self.src_index(end)]
     }
 
     /// Converts CRLF to LF in the given string, raising an error on bare CR.
@@ -456,8 +452,8 @@ impl<'a> StringReader<'a> {
             self.bump();
         }
 
-        self.with_str_from(start, |string| {
-            if string == "_" {
+        match self.str_from(start) {
+            "_" => {
                 self.sess.span_diagnostic
                     .struct_span_warn(self.mk_sp(start, self.pos),
                                       "underscore literal suffix is not allowed")
@@ -468,10 +464,9 @@ impl<'a> StringReader<'a> {
                           <https://github.com/rust-lang/rust/issues/42326>")
                     .emit();
                 None
-            } else {
-                Some(Symbol::intern(string))
             }
-        })
+            name => Some(Symbol::intern(name))
+        }
     }
 
     /// PRECONDITION: self.ch is not whitespace
@@ -513,9 +508,7 @@ impl<'a> StringReader<'a> {
                     }
 
                     let kind = if doc_comment {
-                        self.with_str_from(start_bpos, |string| {
-                            token::DocComment(Symbol::intern(string))
-                        })
+                        token::DocComment(self.name_from(start_bpos))
                     } else {
                         token::Comment
                     };
@@ -615,23 +608,22 @@ impl<'a> StringReader<'a> {
             self.bump();
         }
 
-        self.with_str_from(start_bpos, |string| {
-            // but comments with only "*"s between two "/"s are not
-            let kind = if is_block_doc_comment(string) {
-                let string = if has_cr {
-                    self.translate_crlf(start_bpos,
-                                        string,
-                                        "bare CR not allowed in block doc-comment")
-                } else {
-                    string.into()
-                };
-                token::DocComment(Symbol::intern(&string[..]))
+        let string = self.str_from(start_bpos);
+        // but comments with only "*"s between two "/"s are not
+        let kind = if is_block_doc_comment(string) {
+            let string = if has_cr {
+                self.translate_crlf(start_bpos,
+                                    string,
+                                    "bare CR not allowed in block doc-comment")
             } else {
-                token::Comment
+                string.into()
             };
+            token::DocComment(Symbol::intern(&string[..]))
+        } else {
+            token::Comment
+        };
 
-            Some(Token::new(kind, self.mk_sp(start_bpos, self.pos)))
-        })
+        Some(Token::new(kind, self.mk_sp(start_bpos, self.pos)))
     }
 
     /// Scan through any digits (base `scan_radix`) or underscores,
@@ -838,20 +830,17 @@ impl<'a> StringReader<'a> {
                     self.bump();
                 }
 
-                return Ok(self.with_str_from(start, |string| {
-                    // FIXME: perform NFKC normalization here. (Issue #2253)
-                    let name = ast::Name::intern(string);
-
-                    if is_raw_ident {
-                        let span = self.mk_sp(raw_start, self.pos);
-                        if !name.can_be_raw() {
-                            self.err_span(span, &format!("`{}` cannot be a raw identifier", name));
-                        }
-                        self.sess.raw_identifier_spans.borrow_mut().push(span);
+                // FIXME: perform NFKC normalization here. (Issue #2253)
+                let name = self.name_from(start);
+                if is_raw_ident {
+                    let span = self.mk_sp(raw_start, self.pos);
+                    if !name.can_be_raw() {
+                        self.err_span(span, &format!("`{}` cannot be a raw identifier", name));
                     }
+                    self.sess.raw_identifier_spans.borrow_mut().push(span);
+                }
 
-                    token::Ident(name, is_raw_ident)
-                }));
+                return Ok(token::Ident(name, is_raw_ident));
             }
         }
 
@@ -1300,101 +1289,95 @@ impl<'a> StringReader<'a> {
     }
 
     fn validate_char_escape(&self, start_with_quote: BytePos) {
-        self.with_str_from_to(start_with_quote + BytePos(1), self.pos - BytePos(1), |lit| {
-            if let Err((off, err)) = unescape::unescape_char(lit) {
+        let lit = self.str_from_to(start_with_quote + BytePos(1), self.pos - BytePos(1));
+        if let Err((off, err)) = unescape::unescape_char(lit) {
+            emit_unescape_error(
+                &self.sess.span_diagnostic,
+                lit,
+                self.mk_sp(start_with_quote, self.pos),
+                unescape::Mode::Char,
+                0..off,
+                err,
+            )
+        }
+    }
+
+    fn validate_byte_escape(&self, start_with_quote: BytePos) {
+        let lit = self.str_from_to(start_with_quote + BytePos(1), self.pos - BytePos(1));
+        if let Err((off, err)) = unescape::unescape_byte(lit) {
+            emit_unescape_error(
+                &self.sess.span_diagnostic,
+                lit,
+                self.mk_sp(start_with_quote, self.pos),
+                unescape::Mode::Byte,
+                0..off,
+                err,
+            )
+        }
+    }
+
+    fn validate_str_escape(&self, start_with_quote: BytePos) {
+        let lit = self.str_from_to(start_with_quote + BytePos(1), self.pos - BytePos(1));
+        unescape::unescape_str(lit, &mut |range, c| {
+            if let Err(err) = c {
                 emit_unescape_error(
                     &self.sess.span_diagnostic,
                     lit,
                     self.mk_sp(start_with_quote, self.pos),
-                    unescape::Mode::Char,
-                    0..off,
+                    unescape::Mode::Str,
+                    range,
                     err,
                 )
             }
-        });
+        })
     }
 
-    fn validate_byte_escape(&self, start_with_quote: BytePos) {
-        self.with_str_from_to(start_with_quote + BytePos(1), self.pos - BytePos(1), |lit| {
-            if let Err((off, err)) = unescape::unescape_byte(lit) {
+    fn validate_raw_str_escape(&self, content_start: BytePos, content_end: BytePos) {
+        let lit = self.str_from_to(content_start, content_end);
+        unescape::unescape_raw_str(lit, &mut |range, c| {
+            if let Err(err) = c {
                 emit_unescape_error(
                     &self.sess.span_diagnostic,
                     lit,
-                    self.mk_sp(start_with_quote, self.pos),
-                    unescape::Mode::Byte,
-                    0..off,
+                    self.mk_sp(content_start - BytePos(1), content_end + BytePos(1)),
+                    unescape::Mode::Str,
+                    range,
                     err,
                 )
             }
-        });
-    }
-
-    fn validate_str_escape(&self, start_with_quote: BytePos) {
-        self.with_str_from_to(start_with_quote + BytePos(1), self.pos - BytePos(1), |lit| {
-            unescape::unescape_str(lit, &mut |range, c| {
-                if let Err(err) = c {
-                    emit_unescape_error(
-                        &self.sess.span_diagnostic,
-                        lit,
-                        self.mk_sp(start_with_quote, self.pos),
-                        unescape::Mode::Str,
-                        range,
-                        err,
-                    )
-                }
-            })
-        });
-    }
-
-    fn validate_raw_str_escape(&self, content_start: BytePos, content_end: BytePos) {
-        self.with_str_from_to(content_start, content_end, |lit: &str| {
-            unescape::unescape_raw_str(lit, &mut |range, c| {
-                if let Err(err) = c {
-                    emit_unescape_error(
-                        &self.sess.span_diagnostic,
-                        lit,
-                        self.mk_sp(content_start - BytePos(1), content_end + BytePos(1)),
-                        unescape::Mode::Str,
-                        range,
-                        err,
-                    )
-                }
-            })
-        });
+        })
     }
 
     fn validate_raw_byte_str_escape(&self, content_start: BytePos, content_end: BytePos) {
-        self.with_str_from_to(content_start, content_end, |lit: &str| {
-            unescape::unescape_raw_byte_str(lit, &mut |range, c| {
-                if let Err(err) = c {
-                    emit_unescape_error(
-                        &self.sess.span_diagnostic,
-                        lit,
-                        self.mk_sp(content_start - BytePos(1), content_end + BytePos(1)),
-                        unescape::Mode::ByteStr,
-                        range,
-                        err,
-                    )
-                }
-            })
-        });
+        let lit = self.str_from_to(content_start, content_end);
+        unescape::unescape_raw_byte_str(lit, &mut |range, c| {
+            if let Err(err) = c {
+                emit_unescape_error(
+                    &self.sess.span_diagnostic,
+                    lit,
+                    self.mk_sp(content_start - BytePos(1), content_end + BytePos(1)),
+                    unescape::Mode::ByteStr,
+                    range,
+                    err,
+                )
+            }
+        })
     }
 
     fn validate_byte_str_escape(&self, start_with_quote: BytePos) {
-        self.with_str_from_to(start_with_quote + BytePos(1), self.pos - BytePos(1), |lit| {
-            unescape::unescape_byte_str(lit, &mut |range, c| {
-                if let Err(err) = c {
-                    emit_unescape_error(
-                        &self.sess.span_diagnostic,
-                        lit,
-                        self.mk_sp(start_with_quote, self.pos),
-                        unescape::Mode::ByteStr,
-                        range,
-                        err,
-                    )
-                }
-            })
-        });
+        let lit = self.str_from_to(start_with_quote + BytePos(1), self.pos - BytePos(1));
+        unescape::unescape_byte_str(lit, &mut |range, c| {
+            if let Err(err) = c {
+                emit_unescape_error(
+                    &self.sess.span_diagnostic,
+                    lit,
+                    self.mk_sp(start_with_quote, self.pos),
+                    unescape::Mode::ByteStr,
+                    range,
+                    err,
+                )
+            }
+        })
     }
 }
 

From 57db25e614dfc3ea3c407374a562501471eb1c5d Mon Sep 17 00:00:00 2001
From: Aleksey Kladov <aleksey.kladov@gmail.com>
Date: Tue, 25 Jun 2019 21:37:25 +0300
Subject: [PATCH 2/2] cleanup: rename name_from to symbol_from

Lexer uses Symbols for a lot of stuff, not only for identifiers, so
the "name" terminology is just confusing.
---
 src/libsyntax/parse/lexer/mod.rs | 39 ++++++++++++++++----------------
 1 file changed, 19 insertions(+), 20 deletions(-)

diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs
index 7b9e3bc4d5102..4e4fe4256c9b0 100644
--- a/src/libsyntax/parse/lexer/mod.rs
+++ b/src/libsyntax/parse/lexer/mod.rs
@@ -1,4 +1,3 @@
-use crate::ast;
 use crate::parse::ParseSess;
 use crate::parse::token::{self, Token, TokenKind};
 use crate::symbol::{sym, Symbol};
@@ -328,14 +327,14 @@ impl<'a> StringReader<'a> {
         self.str_from_to(start, self.pos)
     }
 
-    /// Creates a Name from a given offset to the current offset.
-    fn name_from(&self, start: BytePos) -> ast::Name {
+    /// Creates a Symbol from a given offset to the current offset.
+    fn symbol_from(&self, start: BytePos) -> Symbol {
         debug!("taking an ident from {:?} to {:?}", start, self.pos);
         Symbol::intern(self.str_from(start))
     }
 
-    /// As name_from, with an explicit endpoint.
-    fn name_from_to(&self, start: BytePos, end: BytePos) -> ast::Name {
+    /// As symbol_from, with an explicit endpoint.
+    fn symbol_from_to(&self, start: BytePos, end: BytePos) -> Symbol {
         debug!("taking an ident from {:?} to {:?}", start, end);
         Symbol::intern(self.str_from_to(start, end))
     }
@@ -440,7 +439,7 @@ impl<'a> StringReader<'a> {
     }
 
     /// Eats <XID_start><XID_continue>*, if possible.
-    fn scan_optional_raw_name(&mut self) -> Option<ast::Name> {
+    fn scan_optional_raw_name(&mut self) -> Option<Symbol> {
         if !ident_start(self.ch) {
             return None;
         }
@@ -508,7 +507,7 @@ impl<'a> StringReader<'a> {
                     }
 
                     let kind = if doc_comment {
-                        token::DocComment(self.name_from(start_bpos))
+                        token::DocComment(self.symbol_from(start_bpos))
                     } else {
                         token::Comment
                     };
@@ -537,7 +536,7 @@ impl<'a> StringReader<'a> {
                         self.bump();
                     }
                     return Some(Token::new(
-                        token::Shebang(self.name_from(start)),
+                        token::Shebang(self.symbol_from(start)),
                         self.mk_sp(start, self.pos),
                     ));
                 }
@@ -719,17 +718,17 @@ impl<'a> StringReader<'a> {
             let pos = self.pos;
             self.check_float_base(start_bpos, pos, base);
 
-            (token::Float, self.name_from(start_bpos))
+            (token::Float, self.symbol_from(start_bpos))
         } else {
             // it might be a float if it has an exponent
             if self.ch_is('e') || self.ch_is('E') {
                 self.scan_float_exponent();
                 let pos = self.pos;
                 self.check_float_base(start_bpos, pos, base);
-                return (token::Float, self.name_from(start_bpos));
+                return (token::Float, self.symbol_from(start_bpos));
             }
             // but we certainly have an integer!
-            (token::Integer, self.name_from(start_bpos))
+            (token::Integer, self.symbol_from(start_bpos))
         }
     }
 
@@ -831,7 +830,7 @@ impl<'a> StringReader<'a> {
                 }
 
                 // FIXME: perform NFKC normalization here. (Issue #2253)
-                let name = self.name_from(start);
+                let name = self.symbol_from(start);
                 if is_raw_ident {
                     let span = self.mk_sp(raw_start, self.pos);
                     if !name.can_be_raw() {
@@ -1006,7 +1005,7 @@ impl<'a> StringReader<'a> {
                     // lifetimes shouldn't end with a single quote
                     // if we find one, then this is an invalid character literal
                     if self.ch_is('\'') {
-                        let symbol = self.name_from(start);
+                        let symbol = self.symbol_from(start);
                         self.bump();
                         self.validate_char_escape(start_with_quote);
                         return Ok(TokenKind::lit(token::Char, symbol, None));
@@ -1024,7 +1023,7 @@ impl<'a> StringReader<'a> {
                     // Include the leading `'` in the real identifier, for macro
                     // expansion purposes. See #12512 for the gory details of why
                     // this is necessary.
-                    return Ok(token::Lifetime(self.name_from(start_with_quote)));
+                    return Ok(token::Lifetime(self.symbol_from(start_with_quote)));
                 }
                 let msg = "unterminated character literal";
                 let symbol = self.scan_single_quoted_string(start_with_quote, msg);
@@ -1052,7 +1051,7 @@ impl<'a> StringReader<'a> {
                     },
                     Some('r') => {
                         let (start, end, hash_count) = self.scan_raw_string();
-                        let symbol = self.name_from_to(start, end);
+                        let symbol = self.symbol_from_to(start, end);
                         self.validate_raw_byte_str_escape(start, end);
 
                         (token::ByteStrRaw(hash_count), symbol)
@@ -1073,7 +1072,7 @@ impl<'a> StringReader<'a> {
             }
             'r' => {
                 let (start, end, hash_count) = self.scan_raw_string();
-                let symbol = self.name_from_to(start, end);
+                let symbol = self.symbol_from_to(start, end);
                 self.validate_raw_str_escape(start, end);
                 let suffix = self.scan_optional_raw_name();
 
@@ -1174,7 +1173,7 @@ impl<'a> StringReader<'a> {
 
     fn scan_single_quoted_string(&mut self,
                                  start_with_quote: BytePos,
-                                 unterminated_msg: &str) -> ast::Name {
+                                 unterminated_msg: &str) -> Symbol {
         // assumes that first `'` is consumed
         let start = self.pos;
         // lex `'''` as a single char, for recovery
@@ -1206,12 +1205,12 @@ impl<'a> StringReader<'a> {
             }
         }
 
-        let id = self.name_from(start);
+        let id = self.symbol_from(start);
         self.bump();
         id
     }
 
-    fn scan_double_quoted_string(&mut self, unterminated_msg: &str) -> ast::Name {
+    fn scan_double_quoted_string(&mut self, unterminated_msg: &str) -> Symbol {
         debug_assert!(self.ch_is('\"'));
         let start_with_quote = self.pos;
         self.bump();
@@ -1226,7 +1225,7 @@ impl<'a> StringReader<'a> {
             }
             self.bump();
         }
-        let id = self.name_from(start);
+        let id = self.symbol_from(start);
         self.bump();
         id
     }