From 8c93798e9f80d6d6ed9a2ab4b01af06a8fea23b7 Mon Sep 17 00:00:00 2001 From: Andy Russell Date: Sat, 15 Dec 2018 16:25:50 -0500 Subject: [PATCH] rustdoc: check code block syntax in early pass --- src/librustdoc/html/highlight.rs | 92 +++++++++------ src/librustdoc/html/markdown.rs | 109 ++++++++++++++++++ src/librustdoc/lib.rs | 1 + .../passes/check_code_block_syntax.rs | 109 ++++++++++++++++++ .../passes/collect_intra_doc_links.rs | 12 +- src/librustdoc/passes/mod.rs | 20 +++- src/libsyntax/parse/lexer/mod.rs | 13 --- src/test/rustdoc-ui/invalid-syntax.rs | 61 +++++++++- src/test/rustdoc-ui/invalid-syntax.stderr | 105 +++++++++++++++-- src/test/rustdoc/bad-codeblock-syntax.rs | 27 +++++ 10 files changed, 476 insertions(+), 73 deletions(-) create mode 100644 src/librustdoc/passes/check_code_block_syntax.rs create mode 100644 src/test/rustdoc/bad-codeblock-syntax.rs diff --git a/src/librustdoc/html/highlight.rs b/src/librustdoc/html/highlight.rs index 87e979b93e9ef..29a41384edcda 100644 --- a/src/librustdoc/html/highlight.rs +++ b/src/librustdoc/html/highlight.rs @@ -25,40 +25,51 @@ pub fn render_with_highlighting( tooltip: Option<(&str, &str)>, ) -> String { debug!("highlighting: ================\n{}\n==============", src); - let sess = parse::ParseSess::new(FilePathMapping::empty()); - let fm = sess.source_map().new_source_file(FileName::Custom("stdin".to_string()), - src.to_string()); - let mut out = Vec::new(); if let Some((tooltip, class)) = tooltip { write!(out, "
{}
", class, tooltip).unwrap(); } - write_header(class, &mut out).unwrap(); - - let lexer = match lexer::StringReader::new_without_err(&sess, fm, None, "Output from rustc:") { - Ok(l) => l, - Err(_) => { - let first_line = src.lines().next().unwrap_or_else(|| ""); - let mut err = sess.span_diagnostic - .struct_warn(&format!("Invalid doc comment starting with: `{}`\n\ - (Ignoring this codeblock)", - first_line)); - err.emit(); - return String::new(); + + let sess = parse::ParseSess::new(FilePathMapping::empty()); + let fm = sess.source_map().new_source_file( + FileName::Custom(String::from("rustdoc-highlighting")), + src.to_owned(), + ); + let highlight_result = + lexer::StringReader::new_or_buffered_errs(&sess, fm, None).and_then(|lexer| { + let mut classifier = Classifier::new(lexer, sess.source_map()); + + let mut highlighted_source = vec![]; + if classifier.write_source(&mut highlighted_source).is_err() { + Err(classifier.lexer.buffer_fatal_errors()) + } else { + Ok(String::from_utf8_lossy(&highlighted_source).into_owned()) + } + }); + + match highlight_result { + Ok(highlighted_source) => { + write_header(class, &mut out).unwrap(); + write!(out, "{}", highlighted_source).unwrap(); + if let Some(extension) = extension { + write!(out, "{}", extension).unwrap(); + } + write_footer(&mut out).unwrap(); } - }; - let mut classifier = Classifier::new(lexer, sess.source_map()); - if classifier.write_source(&mut out).is_err() { - classifier.lexer.emit_fatal_errors(); - return format!("
{}
", src); - } + Err(errors) => { + // If errors are encountered while trying to highlight, cancel the errors and just emit + // the unhighlighted source. The errors will have already been reported in the + // `check-code-block-syntax` pass. + for mut error in errors { + error.cancel(); + } - if let Some(extension) = extension { - write!(out, "{}", extension).unwrap(); + write!(out, "
{}
", src).unwrap(); + } } - write_footer(&mut out).unwrap(); + String::from_utf8_lossy(&out[..]).into_owned() } @@ -151,6 +162,17 @@ impl Writer for U { } } +enum HighlightError { + LexError, + IoError(io::Error), +} + +impl From for HighlightError { + fn from(err: io::Error) -> Self { + HighlightError::IoError(err) + } +} + impl<'a> Classifier<'a> { fn new(lexer: lexer::StringReader<'a>, source_map: &'a SourceMap) -> Classifier<'a> { Classifier { @@ -162,17 +184,11 @@ impl<'a> Classifier<'a> { } } - /// Gets the next token out of the lexer, emitting fatal errors if lexing fails. - fn try_next_token(&mut self) -> io::Result { + /// Gets the next token out of the lexer. + fn try_next_token(&mut self) -> Result { match self.lexer.try_next_token() { Ok(tas) => Ok(tas), - Err(_) => { - let mut err = self.lexer.sess.span_diagnostic - .struct_warn("Backing out of syntax highlighting"); - err.note("You probably did not intend to render this as a rust code-block"); - err.emit(); - Err(io::Error::new(io::ErrorKind::Other, "")) - } + Err(_) => Err(HighlightError::LexError), } } @@ -185,7 +201,7 @@ impl<'a> Classifier<'a> { /// source. fn write_source(&mut self, out: &mut W) - -> io::Result<()> { + -> Result<(), HighlightError> { loop { let next = self.try_next_token()?; if next.tok == token::Eof { @@ -202,7 +218,7 @@ impl<'a> Classifier<'a> { fn write_token(&mut self, out: &mut W, tas: TokenAndSpan) - -> io::Result<()> { + -> Result<(), HighlightError> { let klass = match tas.tok { token::Shebang(s) => { out.string(Escape(&s.as_str()), Class::None)?; @@ -341,7 +357,9 @@ impl<'a> Classifier<'a> { // Anything that didn't return above is the simple case where we the // class just spans a single token, so we can use the `string` method. - out.string(Escape(&self.snip(tas.sp)), klass) + out.string(Escape(&self.snip(tas.sp)), klass)?; + + Ok(()) } // Helper function to get a snippet from the source_map. diff --git a/src/librustdoc/html/markdown.rs b/src/librustdoc/html/markdown.rs index 05a9a2d1312ae..6b7f54044ca1d 100644 --- a/src/librustdoc/html/markdown.rs +++ b/src/librustdoc/html/markdown.rs @@ -919,6 +919,115 @@ pub fn markdown_links(md: &str) -> Vec<(String, Option>)> { links } +#[derive(Debug)] +crate struct RustCodeBlock { + /// The range in the markdown that the code block occupies. Note that this includes the fences + /// for fenced code blocks. + pub range: Range, + /// The range in the markdown that the code within the code block occupies. + pub code: Range, + pub is_fenced: bool, + pub syntax: Option, +} + +/// Returns a range of bytes for each code block in the markdown that is tagged as `rust` or +/// untagged (and assumed to be rust). +crate fn rust_code_blocks(md: &str) -> Vec { + let mut code_blocks = vec![]; + + if md.is_empty() { + return code_blocks; + } + + let mut opts = Options::empty(); + opts.insert(OPTION_ENABLE_TABLES); + opts.insert(OPTION_ENABLE_FOOTNOTES); + let mut p = Parser::new_ext(md, opts); + + let mut code_block_start = 0; + let mut code_start = 0; + let mut is_fenced = false; + let mut previous_offset = 0; + let mut in_rust_code_block = false; + while let Some(event) = p.next() { + let offset = p.get_offset(); + + match event { + Event::Start(Tag::CodeBlock(syntax)) => { + let lang_string = if syntax.is_empty() { + LangString::all_false() + } else { + LangString::parse(&*syntax, ErrorCodes::Yes) + }; + + if lang_string.rust { + in_rust_code_block = true; + + code_start = offset; + code_block_start = match md[previous_offset..offset].find("```") { + Some(fence_idx) => { + is_fenced = true; + previous_offset + fence_idx + } + None => offset, + }; + } + } + Event::End(Tag::CodeBlock(syntax)) if in_rust_code_block => { + in_rust_code_block = false; + + let code_block_end = if is_fenced { + let fence_str = &md[previous_offset..offset] + .chars() + .rev() + .collect::(); + fence_str + .find("```") + .map(|fence_idx| offset - fence_idx) + .unwrap_or_else(|| offset) + } else if md + .as_bytes() + .get(offset) + .map(|b| *b == b'\n') + .unwrap_or_default() + { + offset - 1 + } else { + offset + }; + + let code_end = if is_fenced { + previous_offset + } else { + code_block_end + }; + + code_blocks.push(RustCodeBlock { + is_fenced, + range: Range { + start: code_block_start, + end: code_block_end, + }, + code: Range { + start: code_start, + end: code_end, + }, + syntax: if !syntax.is_empty() { + Some(syntax.into_owned()) + } else { + None + }, + }); + } + _ => (), + } + + previous_offset = offset; + } + + code_blocks +} + #[derive(Clone, Default, Debug)] pub struct IdMap { map: FxHashMap, diff --git a/src/librustdoc/lib.rs b/src/librustdoc/lib.rs index 1b6d7e87192d6..6e7141f94a776 100644 --- a/src/librustdoc/lib.rs +++ b/src/librustdoc/lib.rs @@ -3,6 +3,7 @@ html_root_url = "https://doc.rust-lang.org/nightly/", html_playground_url = "https://play.rust-lang.org/")] +#![feature(bind_by_move_pattern_guards)] #![feature(rustc_private)] #![feature(box_patterns)] #![feature(box_syntax)] diff --git a/src/librustdoc/passes/check_code_block_syntax.rs b/src/librustdoc/passes/check_code_block_syntax.rs new file mode 100644 index 0000000000000..a013cc36c722d --- /dev/null +++ b/src/librustdoc/passes/check_code_block_syntax.rs @@ -0,0 +1,109 @@ +use errors::Applicability; +use syntax::parse::lexer::{TokenAndSpan, StringReader as Lexer}; +use syntax::parse::{ParseSess, token}; +use syntax::source_map::FilePathMapping; +use syntax_pos::FileName; + +use clean; +use core::DocContext; +use fold::DocFolder; +use html::markdown::{self, RustCodeBlock}; +use passes::Pass; + +pub const CHECK_CODE_BLOCK_SYNTAX: Pass = + Pass::early("check-code-block-syntax", check_code_block_syntax, + "validates syntax inside Rust code blocks"); + +pub fn check_code_block_syntax(krate: clean::Crate, cx: &DocContext) -> clean::Crate { + SyntaxChecker { cx }.fold_crate(krate) +} + +struct SyntaxChecker<'a, 'tcx: 'a, 'rcx: 'a> { + cx: &'a DocContext<'a, 'tcx, 'rcx>, +} + +impl<'a, 'tcx, 'rcx> SyntaxChecker<'a, 'tcx, 'rcx> { + fn check_rust_syntax(&self, item: &clean::Item, dox: &str, code_block: RustCodeBlock) { + let sess = ParseSess::new(FilePathMapping::empty()); + let source_file = sess.source_map().new_source_file( + FileName::Custom(String::from("doctest")), + dox[code_block.code].to_owned(), + ); + + let errors = Lexer::new_or_buffered_errs(&sess, source_file, None).and_then(|mut lexer| { + while let Ok(TokenAndSpan { tok, .. }) = lexer.try_next_token() { + if tok == token::Eof { + break; + } + } + + let errors = lexer.buffer_fatal_errors(); + + if !errors.is_empty() { + Err(errors) + } else { + Ok(()) + } + }); + + if let Err(errors) = errors { + let mut diag = if let Some(sp) = + super::source_span_for_markdown_range(self.cx, &dox, &code_block.range, &item.attrs) + { + let mut diag = self + .cx + .sess() + .struct_span_warn(sp, "could not parse code block as Rust code"); + + for mut err in errors { + diag.note(&format!("error from rustc: {}", err.message())); + err.cancel(); + } + + if code_block.syntax.is_none() && code_block.is_fenced { + let sp = sp.from_inner_byte_pos(0, 3); + diag.span_suggestion_with_applicability( + sp, + "mark blocks that do not contain Rust code as text", + String::from("```text"), + Applicability::MachineApplicable, + ); + } + + diag + } else { + // We couldn't calculate the span of the markdown block that had the error, so our + // diagnostics are going to be a bit lacking. + let mut diag = self.cx.sess().struct_span_warn( + super::span_of_attrs(&item.attrs), + "doc comment contains an invalid Rust code block", + ); + + for mut err in errors { + // Don't bother reporting the error, because we can't show where it happened. + err.cancel(); + } + + if code_block.syntax.is_none() && code_block.is_fenced { + diag.help("mark blocks that do not contain Rust code as text: ```text"); + } + + diag + }; + + diag.emit(); + } + } +} + +impl<'a, 'tcx, 'rcx> DocFolder for SyntaxChecker<'a, 'tcx, 'rcx> { + fn fold_item(&mut self, item: clean::Item) -> Option { + if let Some(dox) = &item.attrs.collapsed_doc_value() { + for code_block in markdown::rust_code_blocks(&dox) { + self.check_rust_syntax(&item, &dox, code_block); + } + } + + self.fold_item_recur(item) + } +} diff --git a/src/librustdoc/passes/collect_intra_doc_links.rs b/src/librustdoc/passes/collect_intra_doc_links.rs index 56d6671266065..3d6096b07ce43 100644 --- a/src/librustdoc/passes/collect_intra_doc_links.rs +++ b/src/librustdoc/passes/collect_intra_doc_links.rs @@ -6,7 +6,7 @@ use syntax; use syntax::ast::{self, Ident, NodeId}; use syntax::feature_gate::UnstableFeatures; use syntax::symbol::Symbol; -use syntax_pos::{self, DUMMY_SP}; +use syntax_pos::DUMMY_SP; use std::ops::Range; @@ -16,6 +16,7 @@ use html::markdown::markdown_links; use clean::*; use passes::{look_for_tests, Pass}; +use super::span_of_attrs; pub const COLLECT_INTRA_DOC_LINKS: Pass = Pass::early("collect-intra-doc-links", collect_intra_doc_links, @@ -440,15 +441,6 @@ fn macro_resolve(cx: &DocContext, path_str: &str) -> Option { None } -pub fn span_of_attrs(attrs: &Attributes) -> syntax_pos::Span { - if attrs.doc_strings.is_empty() { - return DUMMY_SP; - } - let start = attrs.doc_strings[0].span(); - let end = attrs.doc_strings.last().expect("No doc strings provided").span(); - start.to(end) -} - /// Reports a resolution failure diagnostic. /// /// If we cannot find the exact source span of the resolution failure, we use the span of the diff --git a/src/librustdoc/passes/mod.rs b/src/librustdoc/passes/mod.rs index 23581d0051166..c9a3a2c003fe0 100644 --- a/src/librustdoc/passes/mod.rs +++ b/src/librustdoc/passes/mod.rs @@ -8,7 +8,7 @@ use rustc::util::nodemap::DefIdSet; use std::mem; use std::fmt; use syntax::ast::NodeId; -use syntax_pos::Span; +use syntax_pos::{DUMMY_SP, Span}; use std::ops::Range; use clean::{self, GetDefId, Item}; @@ -18,8 +18,6 @@ use fold::StripItem; use html::markdown::{find_testable_code, ErrorCodes, LangString}; -use self::collect_intra_doc_links::span_of_attrs; - mod collapse_docs; pub use self::collapse_docs::COLLAPSE_DOCS; @@ -47,6 +45,9 @@ pub use self::private_items_doc_tests::CHECK_PRIVATE_ITEMS_DOC_TESTS; mod collect_trait_impls; pub use self::collect_trait_impls::COLLECT_TRAIT_IMPLS; +mod check_code_block_syntax; +pub use self::check_code_block_syntax::CHECK_CODE_BLOCK_SYNTAX; + /// Represents a single pass. #[derive(Copy, Clone)] pub enum Pass { @@ -137,6 +138,7 @@ pub const PASSES: &'static [Pass] = &[ STRIP_PRIV_IMPORTS, PROPAGATE_DOC_CFG, COLLECT_INTRA_DOC_LINKS, + CHECK_CODE_BLOCK_SYNTAX, COLLECT_TRAIT_IMPLS, ]; @@ -147,6 +149,7 @@ pub const DEFAULT_PASSES: &'static [&'static str] = &[ "strip-hidden", "strip-private", "collect-intra-doc-links", + "check-code-block-syntax", "collapse-docs", "unindent-comments", "propagate-doc-cfg", @@ -158,6 +161,7 @@ pub const DEFAULT_PRIVATE_PASSES: &'static [&'static str] = &[ "check-private-items-doc-tests", "strip-priv-imports", "collect-intra-doc-links", + "check-code-block-syntax", "collapse-docs", "unindent-comments", "propagate-doc-cfg", @@ -399,6 +403,16 @@ pub fn look_for_tests<'a, 'tcx: 'a, 'rcx: 'a>( } } +/// Return a span encompassing all the given attributes. +crate fn span_of_attrs(attrs: &clean::Attributes) -> Span { + if attrs.doc_strings.is_empty() { + return DUMMY_SP; + } + let start = attrs.doc_strings[0].span(); + let end = attrs.doc_strings.last().expect("No doc strings provided").span(); + start.to(end) +} + /// Attempts to match a range of bytes from parsed markdown to a `Span` in the source code. /// /// This method will return `None` if we cannot construct a span from the source map or if the diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs index ecb34e43c590c..53650bd55aea3 100644 --- a/src/libsyntax/parse/lexer/mod.rs +++ b/src/libsyntax/parse/lexer/mod.rs @@ -238,19 +238,6 @@ impl<'a> StringReader<'a> { sr } - pub fn new_without_err(sess: &'a ParseSess, - source_file: Lrc, - override_span: Option, - prepend_error_text: &str) -> Result { - let mut sr = StringReader::new_raw(sess, source_file, override_span); - if sr.advance_token().is_err() { - eprintln!("{}", prepend_error_text); - sr.emit_fatal_errors(); - return Err(()); - } - Ok(sr) - } - pub fn new_or_buffered_errs(sess: &'a ParseSess, source_file: Lrc, override_span: Option) -> Result> { diff --git a/src/test/rustdoc-ui/invalid-syntax.rs b/src/test/rustdoc-ui/invalid-syntax.rs index 537816be8d735..924e0386d3191 100644 --- a/src/test/rustdoc-ui/invalid-syntax.rs +++ b/src/test/rustdoc-ui/invalid-syntax.rs @@ -1,7 +1,66 @@ // compile-pass -// compile-flags: --error-format=human /// ``` /// \__________pkt->size___________/ \_result->size_/ \__pkt->size__/ /// ``` pub fn foo() {} + +/// ``` +/// | +/// LL | use foobar::Baz; +/// | ^^^^^^ did you mean `baz::foobar`? +/// ``` +pub fn bar() {} + +/// ``` +/// valid +/// ``` +/// +/// ``` +/// \_ +/// ``` +/// +/// ```text +/// "invalid +/// ``` +pub fn valid_and_invalid() {} + +/// This is a normal doc comment, but... +/// +/// There's a code block with bad syntax in it: +/// +/// ```rust +/// \_ +/// ``` +/// +/// Good thing we tested it! +pub fn baz() {} + +/// Indented block start +/// +/// code with bad syntax +/// \_ +/// +/// Indented block end +pub fn quux() {} + +/// Unclosed fence +/// +/// ``` +/// slkdjf +pub fn xyzzy() {} + +/// Indented code that contains a fence +/// +/// ``` +pub fn blah() {} + +/// ```edition2018 +/// \_ +/// ``` +pub fn blargh() {} + +#[doc = "```"] +/// \_ +#[doc = "```"] +pub fn crazy_attrs() {} diff --git a/src/test/rustdoc-ui/invalid-syntax.stderr b/src/test/rustdoc-ui/invalid-syntax.stderr index b5661332e8d0e..10800380a05d3 100644 --- a/src/test/rustdoc-ui/invalid-syntax.stderr +++ b/src/test/rustdoc-ui/invalid-syntax.stderr @@ -1,10 +1,97 @@ -Output from rustc: -error: unknown start of token: / - --> :1:1 - | -1 | /__________pkt->size___________/ /_result->size_/ /__pkt->size__/ - | ^ - -warning: Invalid doc comment starting with: `/__________pkt->size___________/ /_result->size_/ /__pkt->size__/` -(Ignoring this codeblock) +warning: could not parse code block as Rust code + --> $DIR/invalid-syntax.rs:3:5 + | +LL | /// ``` + | _____^ +LL | | /// /__________pkt->size___________/ /_result->size_/ /__pkt->size__/ +LL | | /// ``` + | |_______^ + | + = note: error from rustc: unknown start of token: / +help: mark blocks that do not contain Rust code as text + | +LL | /// ```text + | ^^^^^^^ + +warning: could not parse code block as Rust code + --> $DIR/invalid-syntax.rs:8:5 + | +LL | /// ``` + | _____^ +LL | | /// | +LL | | /// LL | use foobar::Baz; +LL | | /// | ^^^^^^ did you mean `baz::foobar`? +LL | | /// ``` + | |_______^ + | + = note: error from rustc: unknown start of token: ` +help: mark blocks that do not contain Rust code as text + | +LL | /// ```text + | ^^^^^^^ + +warning: could not parse code block as Rust code + --> $DIR/invalid-syntax.rs:19:5 + | +LL | /// ``` + | _____^ +LL | | /// /_ +LL | | /// ``` + | |_______^ + | + = note: error from rustc: unknown start of token: / +help: mark blocks that do not contain Rust code as text + | +LL | /// ```text + | ^^^^^^^ + +warning: could not parse code block as Rust code + --> $DIR/invalid-syntax.rs:32:5 + | +LL | /// ```rust + | _____^ +LL | | /// /_ +LL | | /// ``` + | |_______^ + | + = note: error from rustc: unknown start of token: / + +warning: could not parse code block as Rust code + --> $DIR/invalid-syntax.rs:41:9 + | +LL | /// code with bad syntax + | _________^ +LL | | /// /_ + | |__________^ + | + = note: error from rustc: unknown start of token: / + +warning: could not parse code block as Rust code + --> $DIR/invalid-syntax.rs:55:9 + | +LL | /// ``` + | ^^^ + | + = note: error from rustc: unknown start of token: ` + +warning: could not parse code block as Rust code + --> $DIR/invalid-syntax.rs:58:5 + | +LL | /// ```edition2018 + | _____^ +LL | | /// /_ +LL | | /// ``` + | |_______^ + | + = note: error from rustc: unknown start of token: / + +warning: doc comment contains an invalid Rust code block + --> $DIR/invalid-syntax.rs:63:1 + | +LL | / #[doc = "```"] +LL | | /// /_ +LL | | #[doc = "```"] + | |______________^ + | + = help: mark blocks that do not contain Rust code as text: ```text diff --git a/src/test/rustdoc/bad-codeblock-syntax.rs b/src/test/rustdoc/bad-codeblock-syntax.rs new file mode 100644 index 0000000000000..0ab2f68fcdebe --- /dev/null +++ b/src/test/rustdoc/bad-codeblock-syntax.rs @@ -0,0 +1,27 @@ +// @has bad_codeblock_syntax/fn.foo.html +// @has - '//*[@class="docblock"]/pre/code' '\_' +/// ``` +/// \_ +/// ``` +pub fn foo() {} + +// @has bad_codeblock_syntax/fn.bar.html +// @has - '//*[@class="docblock"]/pre/code' '`baz::foobar`' +/// ``` +/// `baz::foobar` +/// ``` +pub fn bar() {} + +// @has bad_codeblock_syntax/fn.quux.html +// @has - '//*[@class="docblock"]/pre/code' '\_' +/// ```rust +/// \_ +/// ``` +pub fn quux() {} + +// @has bad_codeblock_syntax/fn.ok.html +// @has - '//*[@class="docblock"]/pre/code[@class="language-text"]' '\_' +/// ```text +/// \_ +/// ``` +pub fn ok() {}