From 1baf3ff7e98ea14e84159e213bd9872efd53e07f Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Sat, 29 Aug 2020 14:29:23 +0200 Subject: [PATCH 1/2] Avoid StringReader when checking code blocks for syntax errors `parse_stream_from_source_str` is a more stable API to convert a string into a bunch of tokens, and it also catches errors about mismatched parenthesis. --- .../passes/check_code_block_syntax.rs | 75 ++++++++----------- 1 file changed, 32 insertions(+), 43 deletions(-) diff --git a/src/librustdoc/passes/check_code_block_syntax.rs b/src/librustdoc/passes/check_code_block_syntax.rs index d1f2c12ccd630..beb1f13ca6f75 100644 --- a/src/librustdoc/passes/check_code_block_syntax.rs +++ b/src/librustdoc/passes/check_code_block_syntax.rs @@ -1,7 +1,6 @@ -use rustc_ast::token; use rustc_data_structures::sync::{Lock, Lrc}; use rustc_errors::{emitter::Emitter, Applicability, Diagnostic, Handler}; -use rustc_parse::lexer::StringReader as Lexer; +use rustc_parse::parse_stream_from_source_str; use rustc_session::parse::ParseSess; use rustc_span::source_map::{FilePathMapping, SourceMap}; use rustc_span::{FileName, InnerSpan}; @@ -28,49 +27,34 @@ struct SyntaxChecker<'a, 'tcx> { impl<'a, 'tcx> SyntaxChecker<'a, 'tcx> { fn check_rust_syntax(&self, item: &clean::Item, dox: &str, code_block: RustCodeBlock) { - let buffered_messages = Lrc::new(Lock::new(vec![])); - - let emitter = BufferEmitter { messages: Lrc::clone(&buffered_messages) }; + let buffer = Lrc::new(Lock::new(Buffer::default())); + let emitter = BufferEmitter { buffer: Lrc::clone(&buffer) }; let sm = Lrc::new(SourceMap::new(FilePathMapping::empty())); let handler = Handler::with_emitter(false, None, Box::new(emitter)); + let source = dox[code_block.code].to_owned(); let sess = ParseSess::with_span_handler(handler, sm); - let source_file = sess.source_map().new_source_file( - FileName::Custom(String::from("doctest")), - dox[code_block.code].to_owned(), - ); - - let validation_status = rustc_driver::catch_fatal_errors(|| { - let mut has_syntax_errors = false; - let mut only_whitespace = true; - // even if there is a syntax error, we need to run the lexer over the whole file - let mut lexer = Lexer::new(&sess, source_file, None); - loop { - match lexer.next_token().kind { - token::Eof => break, - token::Whitespace => (), - token::Unknown(..) => has_syntax_errors = true, - _ => only_whitespace = false, - } - } - if has_syntax_errors { - Some(CodeBlockInvalid::SyntaxError) - } else if only_whitespace { - Some(CodeBlockInvalid::Empty) - } else { - None - } + let is_empty = rustc_driver::catch_fatal_errors(|| { + parse_stream_from_source_str( + FileName::Custom(String::from("doctest")), + source, + &sess, + None, + ) + .is_empty() }) - .unwrap_or(Some(CodeBlockInvalid::SyntaxError)); + .unwrap_or(false); + let buffer = buffer.borrow(); - if let Some(code_block_invalid) = validation_status { + if buffer.has_errors || is_empty { let mut diag = if let Some(sp) = super::source_span_for_markdown_range(self.cx, &dox, &code_block.range, &item.attrs) { - let warning_message = match code_block_invalid { - CodeBlockInvalid::SyntaxError => "could not parse code block as Rust code", - CodeBlockInvalid::Empty => "Rust code block is empty", + let warning_message = if buffer.has_errors { + "could not parse code block as Rust code" + } else { + "Rust code block is empty" }; let mut diag = self.cx.sess().struct_span_warn(sp, warning_message); @@ -102,7 +86,7 @@ impl<'a, 'tcx> SyntaxChecker<'a, 'tcx> { }; // FIXME(#67563): Provide more context for these errors by displaying the spans inline. - for message in buffered_messages.borrow().iter() { + for message in buffer.messages.iter() { diag.note(&message); } @@ -125,21 +109,26 @@ impl<'a, 'tcx> DocFolder for SyntaxChecker<'a, 'tcx> { } } +#[derive(Default)] +struct Buffer { + messages: Vec, + has_errors: bool, +} + struct BufferEmitter { - messages: Lrc>>, + buffer: Lrc>, } impl Emitter for BufferEmitter { fn emit_diagnostic(&mut self, diag: &Diagnostic) { - self.messages.borrow_mut().push(format!("error from rustc: {}", diag.message[0].0)); + let mut buffer = self.buffer.borrow_mut(); + buffer.messages.push(format!("error from rustc: {}", diag.message[0].0)); + if diag.is_error() { + buffer.has_errors = true; + } } fn source_map(&self) -> Option<&Lrc> { None } } - -enum CodeBlockInvalid { - SyntaxError, - Empty, -} From 30ce15f1fa763904835c5b3df155964668937683 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Mon, 31 Aug 2020 18:01:01 +0200 Subject: [PATCH 2/2] Make StringReader private After the recent refactorings, we can actually completely hide this type. It should help with #63689. --- compiler/rustc_parse/src/lexer/mod.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs index c4ef35bc30c70..1131f00cb425e 100644 --- a/compiler/rustc_parse/src/lexer/mod.rs +++ b/compiler/rustc_parse/src/lexer/mod.rs @@ -27,7 +27,7 @@ pub struct UnmatchedBrace { pub candidate_span: Option, } -pub struct StringReader<'a> { +crate struct StringReader<'a> { sess: &'a ParseSess, /// Initial position, read-only. start_pos: BytePos, @@ -41,7 +41,7 @@ pub struct StringReader<'a> { } impl<'a> StringReader<'a> { - pub fn new( + crate fn new( sess: &'a ParseSess, source_file: Lrc, override_span: Option, @@ -66,7 +66,7 @@ impl<'a> StringReader<'a> { } /// Returns the next token, including trivia like whitespace or comments. - pub fn next_token(&mut self) -> Token { + fn next_token(&mut self) -> Token { let start_src_index = self.src_index(self.pos); let text: &str = &self.src[start_src_index..self.end_src_index];