Rollup merge of rust-lang#63017 - matklad:no-fatal, r=petrochenkov

Remove special code-path for handing unknown tokens In `StringReader`, we have a buffer of fatal errors, which is used only in a single case: when we see something which is not a reasonable token at all, like `🦀`. I think a more straightforward thing to do here is to produce an explicit error token in this case, and let the next layer (the parser), deal with it. However currently this leads to duplicated error messages. What should we do with this? Naively, I would think that emitting (just emitting, not raising) `FatalError` should stop other errors, but looks like this is not the case? We can also probably tweak parser on the case-by-case basis, to avoid emitting "expected" errors if the current token is an `Err`. I personally also fine with cascading errors in this case: it's quite unlikely that you actually type a fully invalid token. @petrochenkov, which approach should we take to fight cascading errors?
Centril · Aug 6, 2019 · ec1758d · ec1758d
2 parents 1166e2a + b3e8c8b
commit ec1758d
Show file tree

Hide file tree

Showing 13 changed files with 223 additions and 116 deletions.
diff --git a/src/librustc/ich/impls_syntax.rs b/src/librustc/ich/impls_syntax.rs
@@ -363,7 +363,8 @@ impl<'a> HashStable<StableHashingContext<'a>> for token::TokenKind {
             }
 
             token::DocComment(val) |
-            token::Shebang(val) => val.hash_stable(hcx, hasher),
+            token::Shebang(val) |
+            token::Unknown(val) => val.hash_stable(hcx, hasher),
         }
     }
 }

diff --git a/src/librustdoc/html/highlight.rs b/src/librustdoc/html/highlight.rs
@@ -44,7 +44,7 @@ pub fn render_with_highlighting(
 
         let mut highlighted_source = vec![];
         if classifier.write_source(&mut highlighted_source).is_err() {
-            Err(classifier.lexer.buffer_fatal_errors())
+            Err(())
         } else {
             Ok(String::from_utf8_lossy(&highlighted_source).into_owned())
         }
@@ -59,14 +59,9 @@ pub fn render_with_highlighting(
             }
             write_footer(&mut out).unwrap();
         }
-        Err(errors) => {
-            // If errors are encountered while trying to highlight, cancel the errors and just emit
-            // the unhighlighted source. The errors will have already been reported in the
-            // `check-code-block-syntax` pass.
-            for mut error in errors {
-                error.cancel();
-            }
-
+        Err(()) => {
+            // If errors are encountered while trying to highlight, just emit
+            // the unhighlighted source.
             write!(out, "<pre><code>{}</code></pre>", src).unwrap();
         }
     }
@@ -192,14 +187,20 @@ impl<'a> Classifier<'a> {
         if let Some(token) = self.peek_token.take() {
             return Ok(token);
         }
-        self.lexer.try_next_token().map_err(|()| HighlightError::LexError)
+        let token = self.lexer.next_token();
+        if let token::Unknown(..) = &token.kind {
+            return Err(HighlightError::LexError);
+        }
+        Ok(token)
     }
 
     fn peek(&mut self) -> Result<&Token, HighlightError> {
         if self.peek_token.is_none() {
-            self.peek_token = Some(
-                self.lexer.try_next_token().map_err(|()| HighlightError::LexError)?
-            );
+            let token = self.lexer.next_token();
+            if let token::Unknown(..) = &token.kind {
+                return Err(HighlightError::LexError);
+            }
+            self.peek_token = Some(token);
         }
         Ok(self.peek_token.as_ref().unwrap())
     }
@@ -237,7 +238,7 @@ impl<'a> Classifier<'a> {
                 return Ok(());
             },
 
-            token::Whitespace => Class::None,
+            token::Whitespace | token::Unknown(..) => Class::None,
             token::Comment => Class::Comment,
             token::DocComment(..) => Class::DocComment,
 

diff --git a/src/librustdoc/passes/check_code_block_syntax.rs b/src/librustdoc/passes/check_code_block_syntax.rs
@@ -32,24 +32,20 @@ impl<'a, 'tcx> SyntaxChecker<'a, 'tcx> {
             dox[code_block.code].to_owned(),
         );
 
-        let errors = {
+        let has_errors = {
+            let mut has_errors = false;
             let mut lexer = Lexer::new(&sess, source_file, None);
-            while let Ok(token::Token { kind, .. }) = lexer.try_next_token() {
-                if kind == token::Eof {
-                    break;
+            loop  {
+                match lexer.next_token().kind {
+                    token::Eof => break,
+                    token::Unknown(..) => has_errors = true,
+                    _ => (),
                 }
             }
-
-            let errors = lexer.buffer_fatal_errors();
-
-            if !errors.is_empty() {
-                Err(errors)
-            } else {
-                Ok(())
-            }
+            has_errors
         };
 
-        if let Err(errors) = errors {
+        if has_errors {
             let mut diag = if let Some(sp) =
                 super::source_span_for_markdown_range(self.cx, &dox, &code_block.range, &item.attrs)
             {
@@ -58,11 +54,6 @@ impl<'a, 'tcx> SyntaxChecker<'a, 'tcx> {
                     .sess()
                     .struct_span_warn(sp, "could not parse code block as Rust code");
 
-                for mut err in errors {
-                    diag.note(&format!("error from rustc: {}", err.message()));
-                    err.cancel();
-                }
-
                 if code_block.syntax.is_none() && code_block.is_fenced {
                     let sp = sp.from_inner(InnerSpan::new(0, 3));
                     diag.span_suggestion(
@@ -82,11 +73,6 @@ impl<'a, 'tcx> SyntaxChecker<'a, 'tcx> {
                     "doc comment contains an invalid Rust code block",
                 );
 
-                for mut err in errors {
-                    // Don't bother reporting the error, because we can't show where it happened.
-                    err.cancel();
-                }
-
                 if code_block.syntax.is_none() && code_block.is_fenced {
                     diag.help("mark blocks that do not contain Rust code as text: ```text");
                 }

diff --git a/src/libsyntax/ext/proc_macro_server.rs b/src/libsyntax/ext/proc_macro_server.rs
@@ -184,7 +184,7 @@ impl FromInternal<(TreeAndJoint, &'_ ParseSess, &'_ mut Vec<Self>)>
             }
 
             OpenDelim(..) | CloseDelim(..) => unreachable!(),
-            Whitespace | Comment | Shebang(..) | Eof => unreachable!(),
+            Whitespace | Comment | Shebang(..) | Unknown(..) | Eof => unreachable!(),
         }
     }
 }

diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs
@@ -3,7 +3,7 @@ use crate::parse::token::{self, Token, TokenKind};
 use crate::symbol::{sym, Symbol};
 use crate::parse::unescape_error_reporting::{emit_unescape_error, push_escaped_char};
 
-use errors::{FatalError, Diagnostic, DiagnosticBuilder};
+use errors::{FatalError, DiagnosticBuilder};
 use syntax_pos::{BytePos, Pos, Span, NO_EXPANSION};
 use rustc_lexer::Base;
 use rustc_lexer::unescape;
@@ -39,7 +39,6 @@ pub struct StringReader<'a> {
     pos: BytePos,
     /// Stop reading src at this index.
     end_src_index: usize,
-    fatal_errs: Vec<DiagnosticBuilder<'a>>,
     /// Source text to tokenize.
     src: Lrc<String>,
     override_span: Option<Span>,
@@ -62,7 +61,6 @@ impl<'a> StringReader<'a> {
             pos: source_file.start_pos,
             end_src_index: src.len(),
             src,
-            fatal_errs: Vec::new(),
             override_span,
         }
     }
@@ -89,29 +87,17 @@ impl<'a> StringReader<'a> {
         self.override_span.unwrap_or_else(|| Span::new(lo, hi, NO_EXPANSION))
     }
 
-    fn unwrap_or_abort(&mut self, res: Result<Token, ()>) -> Token {
-        match res {
-            Ok(tok) => tok,
-            Err(_) => {
-                self.emit_fatal_errors();
-                FatalError.raise();
-            }
-        }
-    }
-
     /// Returns the next token, including trivia like whitespace or comments.
     ///
     /// `Err(())` means that some errors were encountered, which can be
     /// retrieved using `buffer_fatal_errors`.
-    pub fn try_next_token(&mut self) -> Result<Token, ()> {
-        assert!(self.fatal_errs.is_empty());
-
+    pub fn next_token(&mut self) -> Token {
         let start_src_index = self.src_index(self.pos);
         let text: &str = &self.src[start_src_index..self.end_src_index];
 
         if text.is_empty() {
             let span = self.mk_sp(self.pos, self.pos);
-            return Ok(Token::new(token::Eof, span));
+            return Token::new(token::Eof, span);
         }
 
         {
@@ -125,7 +111,7 @@ impl<'a> StringReader<'a> {
                     let kind = token::Shebang(sym);
 
                     let span = self.mk_sp(start, self.pos);
-                    return Ok(Token::new(kind, span));
+                    return Token::new(kind, span);
                 }
             }
         }
@@ -139,39 +125,10 @@ impl<'a> StringReader<'a> {
 
         // This could use `?`, but that makes code significantly (10-20%) slower.
         // https://github.com/rust-lang/rust/issues/37939
-        let kind = match self.cook_lexer_token(token.kind, start) {
-            Ok(it) => it,
-            Err(err) => return Err(self.fatal_errs.push(err)),
-        };
+        let kind = self.cook_lexer_token(token.kind, start);
 
         let span = self.mk_sp(start, self.pos);
-        Ok(Token::new(kind, span))
-    }
-
-    /// Returns the next token, including trivia like whitespace or comments.
-    ///
-    /// Aborts in case of an error.
-    pub fn next_token(&mut self) -> Token {
-        let res = self.try_next_token();
-        self.unwrap_or_abort(res)
-    }
-
-    fn emit_fatal_errors(&mut self) {
-        for err in &mut self.fatal_errs {
-            err.emit();
-        }
-
-        self.fatal_errs.clear();
-    }
-
-    pub fn buffer_fatal_errors(&mut self) -> Vec<Diagnostic> {
-        let mut buffer = Vec::new();
-
-        for err in self.fatal_errs.drain(..) {
-            err.buffer(&mut buffer);
-        }
-
-        buffer
+        Token::new(kind, span)
     }
 
     /// Report a fatal lexical error with a given span.
@@ -218,8 +175,8 @@ impl<'a> StringReader<'a> {
         &self,
         token: rustc_lexer::TokenKind,
         start: BytePos,
-    ) -> Result<TokenKind, DiagnosticBuilder<'a>> {
-        let kind = match token {
+    ) -> TokenKind {
+        match token {
             rustc_lexer::TokenKind::LineComment => {
                 let string = self.str_from(start);
                 // comments with only more "/"s are not doc comments
@@ -396,16 +353,12 @@ impl<'a> StringReader<'a> {
                 // this should be inside `rustc_lexer`. However, we should first remove compound
                 // tokens like `<<` from `rustc_lexer`, and then add fancier error recovery to it,
                 // as there will be less overall work to do this way.
-                return match unicode_chars::check_for_substitution(self, start, c, &mut err) {
-                    Some(token) => {
-                        err.emit();
-                        Ok(token)
-                    }
-                    None => Err(err),
-                }
+                let token = unicode_chars::check_for_substitution(self, start, c, &mut err)
+                    .unwrap_or_else(|| token::Unknown(self.symbol_from(start)));
+                err.emit();
+                token
             }
-        };
-        Ok(kind)
+        }
     }
 
     fn cook_lexer_literal(

diff --git a/src/libsyntax/parse/lexer/tokentrees.rs b/src/libsyntax/parse/lexer/tokentrees.rs
@@ -217,7 +217,7 @@ impl<'a> TokenTreesReader<'a> {
         loop {
             let token = self.string_reader.next_token();
             match token.kind {
-                token::Whitespace | token::Comment | token::Shebang(_) => {
+                token::Whitespace | token::Comment | token::Shebang(_) | token::Unknown(_) => {
                     self.joint_to_prev = NonJoint;
                 }
                 _ => {

diff --git a/src/libsyntax/parse/token.rs b/src/libsyntax/parse/token.rs
@@ -255,6 +255,8 @@ pub enum TokenKind {
     /// A comment.
     Comment,
     Shebang(ast::Name),
+    /// A completely invalid token which should be skipped.
+    Unknown(ast::Name),
 
     Eof,
 }
@@ -603,7 +605,7 @@ impl Token {
             DotDotEq | Comma | Semi | ModSep | RArrow | LArrow | FatArrow | Pound | Dollar |
             Question | OpenDelim(..) | CloseDelim(..) |
             Literal(..) | Ident(..) | Lifetime(..) | Interpolated(..) | DocComment(..) |
-            Whitespace | Comment | Shebang(..) | Eof => return None,
+            Whitespace | Comment | Shebang(..) | Unknown(..) | Eof => return None,
         };
 
         Some(Token::new(kind, self.span.to(joint.span)))

diff --git a/src/libsyntax/print/pprust.rs b/src/libsyntax/print/pprust.rs
@@ -288,6 +288,7 @@ fn token_kind_to_string_ext(tok: &TokenKind, convert_dollar_crate: Option<Span>)
         token::Whitespace           => " ".to_string(),
         token::Comment              => "/* */".to_string(),
         token::Shebang(s)           => format!("/* shebang: {}*/", s),
+        token::Unknown(s)           => s.to_string(),
 
         token::Interpolated(ref nt) => nonterminal_to_string(nt),
     }