From 15fbe618a14ddde520561c4cf1b85d4e4c9005f8 Mon Sep 17 00:00:00 2001 From: Michael Howell Date: Sat, 29 Jun 2024 09:34:52 -0700 Subject: [PATCH 1/4] rustdoc: update to pulldown-cmark 0.11 --- Cargo.lock | 21 ++-- compiler/rustc_resolve/Cargo.toml | 2 +- compiler/rustc_resolve/src/rustdoc.rs | 14 ++- src/librustdoc/html/markdown.rs | 111 +++++++++--------- src/librustdoc/passes/lint/bare_urls.rs | 4 +- src/librustdoc/passes/lint/html_tags.rs | 6 +- .../passes/lint/redundant_explicit_links.rs | 16 ++- src/tools/tidy/src/deps.rs | 1 + 8 files changed, 94 insertions(+), 81 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 94d70a020a4dc..3af90a252aea7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3123,25 +3123,26 @@ dependencies = [ [[package]] name = "pulldown-cmark" -version = "0.9.6" +version = "0.10.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57206b407293d2bcd3af849ce869d52068623f19e1b5ff8e8778e3309439682b" +checksum = "76979bea66e7875e7509c4ec5300112b316af87fa7a252ca91c448b32dfe3993" dependencies = [ "bitflags 2.5.0", + "getopts", "memchr", + "pulldown-cmark-escape 0.10.1", "unicase", ] [[package]] name = "pulldown-cmark" -version = "0.10.3" +version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76979bea66e7875e7509c4ec5300112b316af87fa7a252ca91c448b32dfe3993" +checksum = "8746739f11d39ce5ad5c2520a9b75285310dbfe78c541ccf832d38615765aec0" dependencies = [ "bitflags 2.5.0", - "getopts", "memchr", - "pulldown-cmark-escape", + "pulldown-cmark-escape 0.11.0", "unicase", ] @@ -3151,6 +3152,12 @@ version = "0.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bd348ff538bc9caeda7ee8cad2d1d48236a1f443c1fa3913c6a02fe0043b1dd3" +[[package]] +name = "pulldown-cmark-escape" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "007d8adb5ddab6f8e3f491ac63566a7d5002cc7ed73901f72057943fa71ae1ae" + [[package]] name = "pulldown-cmark-to-cmark" version = "13.0.0" @@ -4604,7 +4611,7 @@ name = "rustc_resolve" version = "0.0.0" dependencies = [ "bitflags 2.5.0", - "pulldown-cmark 0.9.6", + "pulldown-cmark 0.11.0", "rustc_arena", "rustc_ast", "rustc_ast_pretty", diff --git a/compiler/rustc_resolve/Cargo.toml b/compiler/rustc_resolve/Cargo.toml index b6ae54010c242..b71853b871dc5 100644 --- a/compiler/rustc_resolve/Cargo.toml +++ b/compiler/rustc_resolve/Cargo.toml @@ -6,7 +6,7 @@ edition = "2021" [dependencies] # tidy-alphabetical-start bitflags = "2.4.1" -pulldown-cmark = { version = "0.9.6", default-features = false } +pulldown-cmark = { version = "0.11", features = ["html"], default-features = false } rustc_arena = { path = "../rustc_arena" } rustc_ast = { path = "../rustc_ast" } rustc_ast_pretty = { path = "../rustc_ast_pretty" } diff --git a/compiler/rustc_resolve/src/rustdoc.rs b/compiler/rustc_resolve/src/rustdoc.rs index 66b4981eb55ba..594608153211d 100644 --- a/compiler/rustc_resolve/src/rustdoc.rs +++ b/compiler/rustc_resolve/src/rustdoc.rs @@ -1,4 +1,6 @@ -use pulldown_cmark::{BrokenLink, CowStr, Event, LinkType, Options, Parser, Tag}; +use pulldown_cmark::{ + BrokenLink, BrokenLinkCallback, CowStr, Event, LinkType, Options, Parser, Tag, +}; use rustc_ast as ast; use rustc_ast::util::comments::beautify_doc_string; use rustc_data_structures::fx::FxHashMap; @@ -427,7 +429,9 @@ fn parse_links<'md>(doc: &'md str) -> Vec> { while let Some(event) = event_iter.next() { match event { - Event::Start(Tag::Link(link_type, dest, _)) if may_be_doc_link(link_type) => { + Event::Start(Tag::Link { link_type, dest_url, title: _, id: _ }) + if may_be_doc_link(link_type) => + { if matches!( link_type, LinkType::Inline @@ -441,7 +445,7 @@ fn parse_links<'md>(doc: &'md str) -> Vec> { } } - links.push(preprocess_link(&dest)); + links.push(preprocess_link(&dest_url)); } _ => {} } @@ -451,8 +455,8 @@ fn parse_links<'md>(doc: &'md str) -> Vec> { } /// Collects additional data of link. -fn collect_link_data<'input, 'callback>( - event_iter: &mut Parser<'input, 'callback>, +fn collect_link_data<'input, F: BrokenLinkCallback<'input>>( + event_iter: &mut Parser<'input, F>, ) -> Option> { let mut display_text: Option = None; let mut append_text = |text: CowStr<'_>| { diff --git a/src/librustdoc/html/markdown.rs b/src/librustdoc/html/markdown.rs index bae929c64eab2..a7f0df5afa98f 100644 --- a/src/librustdoc/html/markdown.rs +++ b/src/librustdoc/html/markdown.rs @@ -54,7 +54,8 @@ use crate::html::render::small_url_encode; use crate::html::toc::TocBuilder; use pulldown_cmark::{ - html, BrokenLink, CodeBlockKind, CowStr, Event, LinkType, OffsetIter, Options, Parser, Tag, + html, BrokenLink, BrokenLinkCallback, CodeBlockKind, CowStr, Event, LinkType, OffsetIter, + Options, Parser, Tag, TagEnd, }; #[cfg(test)] @@ -230,7 +231,7 @@ impl<'a, I: Iterator>> Iterator for CodeBlocks<'_, 'a, I> { let mut original_text = String::new(); for event in &mut self.inner { match event { - Event::End(Tag::CodeBlock(..)) => break, + Event::End(TagEnd::CodeBlock) => break, Event::Text(ref s) => { original_text.push_str(s); } @@ -359,16 +360,17 @@ impl<'a, I: Iterator>> Iterator for LinkReplacer<'a, I> { match &mut event { // This is a shortcut link that was resolved by the broken_link_callback: `[fn@f]` // Remove any disambiguator. - Some(Event::Start(Tag::Link( + Some(Event::Start(Tag::Link { // [fn@f] or [fn@f][] - LinkType::ShortcutUnknown | LinkType::CollapsedUnknown, - dest, + link_type: LinkType::ShortcutUnknown | LinkType::CollapsedUnknown, + dest_url, title, - ))) => { - debug!("saw start of shortcut link to {dest} with title {title}"); + .. + })) => { + debug!("saw start of shortcut link to {dest_url} with title {title}"); // If this is a shortcut link, it was resolved by the broken_link_callback. // So the URL will already be updated properly. - let link = self.links.iter().find(|&link| *link.href == **dest); + let link = self.links.iter().find(|&link| *link.href == **dest_url); // Since this is an external iterator, we can't replace the inner text just yet. // Store that we saw a link so we know to replace it later. if let Some(link) = link { @@ -381,16 +383,9 @@ impl<'a, I: Iterator>> Iterator for LinkReplacer<'a, I> { } } // Now that we're done with the shortcut link, don't replace any more text. - Some(Event::End(Tag::Link( - LinkType::ShortcutUnknown | LinkType::CollapsedUnknown, - dest, - _, - ))) => { - debug!("saw end of shortcut link to {dest}"); - if self.links.iter().any(|link| *link.href == **dest) { - assert!(self.shortcut_link.is_some(), "saw closing link without opening tag"); - self.shortcut_link = None; - } + Some(Event::End(TagEnd::Link)) if self.shortcut_link.is_some() => { + debug!("saw end of shortcut link"); + self.shortcut_link = None; } // Handle backticks in inline code blocks, but only if we're in the middle of a shortcut link. // [`fn@f`] @@ -433,9 +428,11 @@ impl<'a, I: Iterator>> Iterator for LinkReplacer<'a, I> { } // If this is a link, but not a shortcut link, // replace the URL, since the broken_link_callback was not called. - Some(Event::Start(Tag::Link(_, dest, title))) => { - if let Some(link) = self.links.iter().find(|&link| *link.original_text == **dest) { - *dest = CowStr::Borrowed(link.href.as_ref()); + Some(Event::Start(Tag::Link { dest_url, title, .. })) => { + if let Some(link) = + self.links.iter().find(|&link| *link.original_text == **dest_url) + { + *dest_url = CowStr::Borrowed(link.href.as_ref()); if title.is_empty() && !link.tooltip.is_empty() { *title = CowStr::Borrowed(link.tooltip.as_ref()); } @@ -477,9 +474,9 @@ impl<'a, I: Iterator>> Iterator for TableWrapper<'a, I> { self.stored_events.push_back(Event::Start(Tag::Table(t))); Event::Html(CowStr::Borrowed("
")) } - Event::End(Tag::Table(t)) => { + Event::End(TagEnd::Table) => { self.stored_events.push_back(Event::Html(CowStr::Borrowed("
"))); - Event::End(Tag::Table(t)) + Event::End(TagEnd::Table) } e => e, }) @@ -519,11 +516,11 @@ impl<'a, 'b, 'ids, I: Iterator>> Iterator } let event = self.inner.next(); - if let Some((Event::Start(Tag::Heading(level, _, _)), _)) = event { + if let Some((Event::Start(Tag::Heading { level, .. }), _)) = event { let mut id = String::new(); for event in &mut self.inner { match &event.0 { - Event::End(Tag::Heading(..)) => break, + Event::End(TagEnd::Heading(_)) => break, Event::Text(text) | Event::Code(text) => { id.extend(text.chars().filter_map(slugify)); self.buf.push_back(event); @@ -566,27 +563,27 @@ impl<'a, I: Iterator>> SummaryLine<'a, I> { } } -fn check_if_allowed_tag(t: &Tag<'_>) -> bool { +fn check_if_allowed_tag(t: &TagEnd) -> bool { matches!( t, - Tag::Paragraph - | Tag::Emphasis - | Tag::Strong - | Tag::Strikethrough - | Tag::Link(..) - | Tag::BlockQuote + TagEnd::Paragraph + | TagEnd::Emphasis + | TagEnd::Strong + | TagEnd::Strikethrough + | TagEnd::Link + | TagEnd::BlockQuote ) } -fn is_forbidden_tag(t: &Tag<'_>) -> bool { +fn is_forbidden_tag(t: &TagEnd) -> bool { matches!( t, - Tag::CodeBlock(_) - | Tag::Table(_) - | Tag::TableHead - | Tag::TableRow - | Tag::TableCell - | Tag::FootnoteDefinition(_) + TagEnd::CodeBlock + | TagEnd::Table + | TagEnd::TableHead + | TagEnd::TableRow + | TagEnd::TableCell + | TagEnd::FootnoteDefinition ) } @@ -604,12 +601,12 @@ impl<'a, I: Iterator>> Iterator for SummaryLine<'a, I> { let mut is_start = true; let is_allowed_tag = match event { Event::Start(ref c) => { - if is_forbidden_tag(c) { + if is_forbidden_tag(&c.to_end()) { self.skipped_tags += 1; return None; } self.depth += 1; - check_if_allowed_tag(c) + check_if_allowed_tag(&c.to_end()) } Event::End(ref c) => { if is_forbidden_tag(c) { @@ -633,7 +630,7 @@ impl<'a, I: Iterator>> Iterator for SummaryLine<'a, I> { if is_start { Some(Event::Start(Tag::Paragraph)) } else { - Some(Event::End(Tag::Paragraph)) + Some(Event::End(TagEnd::Paragraph)) } } else { Some(event) @@ -679,7 +676,7 @@ impl<'a, I: Iterator>> Iterator for Footnotes<'a, I> { Some((Event::Start(Tag::FootnoteDefinition(def)), _)) => { let mut content = Vec::new(); for (event, _) in &mut self.inner { - if let Event::End(Tag::FootnoteDefinition(..)) = event { + if let Event::End(TagEnd::FootnoteDefinition) = event { break; } content.push(event); @@ -696,7 +693,7 @@ impl<'a, I: Iterator>> Iterator for Footnotes<'a, I> { for (mut content, id) in v { write!(ret, "
  • ").unwrap(); let mut is_paragraph = false; - if let Some(&Event::End(Tag::Paragraph)) = content.last() { + if let Some(&Event::End(TagEnd::Paragraph)) = content.last() { content.pop(); is_paragraph = true; } @@ -806,7 +803,7 @@ pub(crate) fn find_codes( tests.visit_test(text, block_info, line); prev_offset = offset.start; } - Event::Start(Tag::Heading(level, _, _)) => { + Event::Start(Tag::Heading { level, .. }) => { register_header = Some(level as u32); } Event::Text(ref s) if register_header.is_some() => { @@ -1432,7 +1429,7 @@ impl MarkdownItemInfo<'_> { // Treat inline HTML as plain text. let p = p.map(|event| match event.0 { - Event::Html(text) => (Event::Text(text), event.1), + Event::Html(text) | Event::InlineHtml(text) => (Event::Text(text), event.1), _ => event, }); @@ -1442,7 +1439,7 @@ impl MarkdownItemInfo<'_> { let p = Footnotes::new(p); let p = TableWrapper::new(p.map(|(ev, _)| ev)); let p = p.filter(|event| { - !matches!(event, Event::Start(Tag::Paragraph) | Event::End(Tag::Paragraph)) + !matches!(event, Event::Start(Tag::Paragraph) | Event::End(TagEnd::Paragraph)) }); html::push_html(&mut s, p); @@ -1472,7 +1469,7 @@ impl MarkdownSummaryLine<'_> { let mut s = String::new(); let without_paragraphs = LinkReplacer::new(&mut summary, links).filter(|event| { - !matches!(event, Event::Start(Tag::Paragraph) | Event::End(Tag::Paragraph)) + !matches!(event, Event::Start(Tag::Paragraph) | Event::End(TagEnd::Paragraph)) }); html::push_html(&mut s, without_paragraphs); @@ -1544,8 +1541,8 @@ fn markdown_summary_with_limit( _ => {} }, Event::End(tag) => match tag { - Tag::Emphasis | Tag::Strong => buf.close_tag(), - Tag::Paragraph | Tag::Heading(..) => return ControlFlow::Break(()), + TagEnd::Emphasis | TagEnd::Strong => buf.close_tag(), + TagEnd::Paragraph | TagEnd::Heading(_) => return ControlFlow::Break(()), _ => {} }, Event::HardBreak | Event::SoftBreak => buf.push(" ")?, @@ -1605,8 +1602,8 @@ pub(crate) fn plain_text_summary(md: &str, link_names: &[RenderedLink]) -> Strin } Event::HardBreak | Event::SoftBreak => s.push(' '), Event::Start(Tag::CodeBlock(..)) => break, - Event::End(Tag::Paragraph) => break, - Event::End(Tag::Heading(..)) => break, + Event::End(TagEnd::Paragraph) => break, + Event::End(TagEnd::Heading(..)) => break, _ => (), } } @@ -1765,7 +1762,7 @@ pub(crate) fn markdown_links<'md, R>( while let Some((event, span)) = event_iter.next() { match event { - Event::Start(Tag::Link(link_type, dest, _)) if may_be_doc_link(link_type) => { + Event::Start(Tag::Link { link_type, dest_url, .. }) if may_be_doc_link(link_type) => { let range = match link_type { // Link is pulled from the link itself. LinkType::ReferenceUnknown | LinkType::ShortcutUnknown => { @@ -1775,7 +1772,7 @@ pub(crate) fn markdown_links<'md, R>( LinkType::Inline => span_for_offset_backward(span, b'(', b')'), // Link is pulled from elsewhere in the document. LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut => { - span_for_link(&dest, span) + span_for_link(&dest_url, span) } LinkType::Autolink | LinkType::Email => unreachable!(), }; @@ -1795,7 +1792,7 @@ pub(crate) fn markdown_links<'md, R>( if let Some(link) = preprocess_link(MarkdownLink { kind: link_type, - link: dest.into_string(), + link: dest_url.into_string(), display_text, range, }) { @@ -1810,8 +1807,8 @@ pub(crate) fn markdown_links<'md, R>( } /// Collects additional data of link. -fn collect_link_data<'input, 'callback>( - event_iter: &mut OffsetIter<'input, 'callback>, +fn collect_link_data<'input, F: BrokenLinkCallback<'input>>( + event_iter: &mut OffsetIter<'input, F>, ) -> Option { let mut display_text: Option = None; let mut append_text = |text: CowStr<'_>| { diff --git a/src/librustdoc/passes/lint/bare_urls.rs b/src/librustdoc/passes/lint/bare_urls.rs index 8f68f6ff4764a..4b2d3092837e1 100644 --- a/src/librustdoc/passes/lint/bare_urls.rs +++ b/src/librustdoc/passes/lint/bare_urls.rs @@ -42,11 +42,11 @@ pub(super) fn visit_item(cx: &DocContext<'_>, item: &Item) { match event { Event::Text(s) => find_raw_urls(cx, &s, range, &report_diag), // We don't want to check the text inside code blocks or links. - Event::Start(tag @ (Tag::CodeBlock(_) | Tag::Link(..))) => { + Event::Start(tag @ (Tag::CodeBlock(_) | Tag::Link { .. })) => { while let Some((event, _)) = p.next() { match event { Event::End(end) - if mem::discriminant(&end) == mem::discriminant(&tag) => + if mem::discriminant(&end) == mem::discriminant(&tag.to_end()) => { break; } diff --git a/src/librustdoc/passes/lint/html_tags.rs b/src/librustdoc/passes/lint/html_tags.rs index a0064a9011254..87dfa5d5389d7 100644 --- a/src/librustdoc/passes/lint/html_tags.rs +++ b/src/librustdoc/passes/lint/html_tags.rs @@ -4,7 +4,7 @@ use crate::clean::*; use crate::core::DocContext; use crate::html::markdown::main_body_opts; -use pulldown_cmark::{BrokenLink, Event, LinkType, Parser, Tag}; +use pulldown_cmark::{BrokenLink, Event, LinkType, Parser, Tag, TagEnd}; use rustc_resolve::rustdoc::source_span_for_markdown_range; use std::iter::Peekable; @@ -140,10 +140,10 @@ pub(crate) fn visit_item(cx: &DocContext<'_>, item: &Item) { for (event, range) in p { match event { Event::Start(Tag::CodeBlock(_)) => in_code_block = true, - Event::Html(text) if !in_code_block => { + Event::Html(text) | Event::InlineHtml(text) if !in_code_block => { extract_tags(&mut tags, &text, range, &mut is_in_comment, &report_diag) } - Event::End(Tag::CodeBlock(_)) => in_code_block = false, + Event::End(TagEnd::CodeBlock) => in_code_block = false, _ => {} } } diff --git a/src/librustdoc/passes/lint/redundant_explicit_links.rs b/src/librustdoc/passes/lint/redundant_explicit_links.rs index 7ab974046b9c7..b36b41c9f2d2e 100644 --- a/src/librustdoc/passes/lint/redundant_explicit_links.rs +++ b/src/librustdoc/passes/lint/redundant_explicit_links.rs @@ -1,6 +1,8 @@ use std::ops::Range; -use pulldown_cmark::{BrokenLink, CowStr, Event, LinkType, OffsetIter, Parser, Tag}; +use pulldown_cmark::{ + BrokenLink, BrokenLinkCallback, CowStr, Event, LinkType, OffsetIter, Parser, Tag, +}; use rustc_ast::NodeId; use rustc_errors::SuggestionStyle; use rustc_hir::def::{DefKind, DocLinkResMap, Namespace, Res}; @@ -95,7 +97,7 @@ fn check_redundant_explicit_link<'md>( while let Some((event, link_range)) = offset_iter.next() { match event { - Event::Start(Tag::Link(link_type, dest, _)) => { + Event::Start(Tag::Link { link_type, dest_url, .. }) => { let link_data = collect_link_data(&mut offset_iter); if let Some(resolvable_link) = link_data.resolvable_link.as_ref() { @@ -108,7 +110,7 @@ fn check_redundant_explicit_link<'md>( } } - let explicit_link = dest.to_string(); + let explicit_link = dest_url.to_string(); let display_link = link_data.resolvable_link.clone()?; if explicit_link.ends_with(&display_link) || display_link.ends_with(&explicit_link) @@ -122,7 +124,7 @@ fn check_redundant_explicit_link<'md>( doc, resolutions, link_range, - dest.to_string(), + dest_url.to_string(), link_data, if link_type == LinkType::Inline { (b'(', b')') @@ -139,7 +141,7 @@ fn check_redundant_explicit_link<'md>( doc, resolutions, link_range, - &dest, + &dest_url, link_data, ); } @@ -259,7 +261,9 @@ fn find_resolution(resolutions: &DocLinkResMap, path: &str) -> Option) -> LinkData { +fn collect_link_data<'input, F: BrokenLinkCallback<'input>>( + offset_iter: &mut OffsetIter<'input, F>, +) -> LinkData { let mut resolvable_link = None; let mut resolvable_link_range = None; let mut display_link = String::new(); diff --git a/src/tools/tidy/src/deps.rs b/src/tools/tidy/src/deps.rs index aa119819aaa26..82fa43f581fde 100644 --- a/src/tools/tidy/src/deps.rs +++ b/src/tools/tidy/src/deps.rs @@ -335,6 +335,7 @@ const PERMITTED_RUSTC_DEPENDENCIES: &[&str] = &[ "proc-macro2", "psm", "pulldown-cmark", + "pulldown-cmark-escape", "punycode", "quote", "r-efi", From 294c3dda881ae65d528ee0380b7628deaf33ae96 Mon Sep 17 00:00:00 2001 From: Michael Howell Date: Sat, 29 Jun 2024 11:24:36 -0700 Subject: [PATCH 2/4] rustdoc: add usable lint for pulldown-cmark-0.11 parsing changes --- Cargo.lock | 12 ++ src/librustdoc/Cargo.toml | 1 + src/librustdoc/lint.rs | 9 ++ src/librustdoc/passes/lint.rs | 2 + .../passes/lint/unportable_markdown.rs | 152 ++++++++++++++++++ tests/rustdoc-ui/unportable-markdown.rs | 63 ++++++++ tests/rustdoc-ui/unportable-markdown.stderr | 39 +++++ 7 files changed, 278 insertions(+) create mode 100644 src/librustdoc/passes/lint/unportable_markdown.rs create mode 100644 tests/rustdoc-ui/unportable-markdown.rs create mode 100644 tests/rustdoc-ui/unportable-markdown.stderr diff --git a/Cargo.lock b/Cargo.lock index 3af90a252aea7..96cef9070842e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3121,6 +3121,17 @@ dependencies = [ "cc", ] +[[package]] +name = "pulldown-cmark" +version = "0.9.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57206b407293d2bcd3af849ce869d52068623f19e1b5ff8e8778e3309439682b" +dependencies = [ + "bitflags 2.5.0", + "memchr", + "unicase", +] + [[package]] name = "pulldown-cmark" version = "0.10.3" @@ -4890,6 +4901,7 @@ dependencies = [ "indexmap", "itertools", "minifier", + "pulldown-cmark 0.9.6", "regex", "rustdoc-json-types", "serde", diff --git a/src/librustdoc/Cargo.toml b/src/librustdoc/Cargo.toml index 31222f213d800..51fb126cb3407 100644 --- a/src/librustdoc/Cargo.toml +++ b/src/librustdoc/Cargo.toml @@ -13,6 +13,7 @@ base64 = "0.21.7" itertools = "0.12" indexmap = "2" minifier = "0.3.0" +pulldown-cmark-old = { version = "0.9.6", package = "pulldown-cmark", default-features = false } regex = "1" rustdoc-json-types = { path = "../rustdoc-json-types" } serde_json = "1.0" diff --git a/src/librustdoc/lint.rs b/src/librustdoc/lint.rs index dd2bb47e5926b..8eaca70eaff48 100644 --- a/src/librustdoc/lint.rs +++ b/src/librustdoc/lint.rs @@ -196,6 +196,14 @@ declare_rustdoc_lint! { "detects redundant explicit links in doc comments" } +declare_rustdoc_lint! { + /// This compatibility lint checks for Markdown syntax that works in the old engine but not + /// the new one. + UNPORTABLE_MARKDOWN, + Warn, + "detects markdown that is interpreted differently in different parser" +} + pub(crate) static RUSTDOC_LINTS: Lazy> = Lazy::new(|| { vec![ BROKEN_INTRA_DOC_LINKS, @@ -209,6 +217,7 @@ pub(crate) static RUSTDOC_LINTS: Lazy> = Lazy::new(|| { MISSING_CRATE_LEVEL_DOCS, UNESCAPED_BACKTICKS, REDUNDANT_EXPLICIT_LINKS, + UNPORTABLE_MARKDOWN, ] }); diff --git a/src/librustdoc/passes/lint.rs b/src/librustdoc/passes/lint.rs index c6d5b7bd346d4..bc804a340bf2c 100644 --- a/src/librustdoc/passes/lint.rs +++ b/src/librustdoc/passes/lint.rs @@ -6,6 +6,7 @@ mod check_code_block_syntax; mod html_tags; mod redundant_explicit_links; mod unescaped_backticks; +mod unportable_markdown; use super::Pass; use crate::clean::*; @@ -31,6 +32,7 @@ impl<'a, 'tcx> DocVisitor for Linter<'a, 'tcx> { html_tags::visit_item(self.cx, item); unescaped_backticks::visit_item(self.cx, item); redundant_explicit_links::visit_item(self.cx, item); + unportable_markdown::visit_item(self.cx, item); self.visit_item_recur(item) } diff --git a/src/librustdoc/passes/lint/unportable_markdown.rs b/src/librustdoc/passes/lint/unportable_markdown.rs new file mode 100644 index 0000000000000..5f18537763441 --- /dev/null +++ b/src/librustdoc/passes/lint/unportable_markdown.rs @@ -0,0 +1,152 @@ +//! Detects specific markdown syntax that's different between pulldown-cmark +//! 0.9 and 0.11. +//! +//! This is a mitigation for old parser bugs that affected some +//! real crates' docs. The old parser claimed to comply with CommonMark, +//! but it did not. These warnings will eventually be removed, +//! though some of them may become Clippy lints. +//! +//! +//! +//! + +use crate::clean::Item; +use crate::core::DocContext; +use pulldown_cmark as cmarkn; +use pulldown_cmark_old as cmarko; +use rustc_lint_defs::Applicability; +use rustc_resolve::rustdoc::source_span_for_markdown_range; +use std::collections::{BTreeMap, BTreeSet}; + +pub(crate) fn visit_item(cx: &DocContext<'_>, item: &Item) { + let tcx = cx.tcx; + let Some(hir_id) = DocContext::as_local_hir_id(tcx, item.item_id) else { + // If non-local, no need to check anything. + return; + }; + + let dox = item.doc_value(); + if dox.is_empty() { + return; + } + + // P1: unintended strikethrough was fixed by requiring single-tildes to flank + // the same way underscores do, so nothing is done here + + // P2: block quotes without following space parsed wrong + // + // This is the set of starting points for block quotes with no space after + // the `>`. It is populated by the new parser, and if the old parser fails to + // clear it out, it'll produce a warning. + let mut spaceless_block_quotes = BTreeSet::new(); + + // P3: missing footnote references + // + // This is populated by listening for FootnoteReference from + // the new parser and old parser. + let mut missing_footnote_references = BTreeMap::new(); + let mut found_footnote_references = BTreeSet::new(); + + // populate problem cases from new parser + { + pub fn main_body_opts_new() -> cmarkn::Options { + cmarkn::Options::ENABLE_TABLES + | cmarkn::Options::ENABLE_FOOTNOTES + | cmarkn::Options::ENABLE_STRIKETHROUGH + | cmarkn::Options::ENABLE_TASKLISTS + | cmarkn::Options::ENABLE_SMART_PUNCTUATION + } + let mut parser_new = cmarkn::Parser::new_ext(&dox, main_body_opts_new()).into_offset_iter(); + while let Some((event, span)) = parser_new.next() { + if let cmarkn::Event::Start(cmarkn::Tag::BlockQuote(_)) = event { + if !dox[span.clone()].starts_with("> ") { + spaceless_block_quotes.insert(span.start); + } + } + if let cmarkn::Event::FootnoteReference(_) = event { + found_footnote_references.insert(span.start + 1); + } + } + } + + // remove cases where they don't actually differ + { + pub fn main_body_opts_old() -> cmarko::Options { + cmarko::Options::ENABLE_TABLES + | cmarko::Options::ENABLE_FOOTNOTES + | cmarko::Options::ENABLE_STRIKETHROUGH + | cmarko::Options::ENABLE_TASKLISTS + | cmarko::Options::ENABLE_SMART_PUNCTUATION + } + let mut parser_old = cmarko::Parser::new_ext(&dox, main_body_opts_old()).into_offset_iter(); + while let Some((event, span)) = parser_old.next() { + if let cmarko::Event::Start(cmarko::Tag::BlockQuote) = event { + if !dox[span.clone()].starts_with("> ") { + spaceless_block_quotes.remove(&span.start); + } + } + if let cmarko::Event::FootnoteReference(_) = event { + if !found_footnote_references.contains(&(span.start + 1)) { + missing_footnote_references.insert(span.start + 1, span); + } + } + } + } + + for start in spaceless_block_quotes { + let (span, precise) = + source_span_for_markdown_range(tcx, &dox, &(start..start + 1), &item.attrs.doc_strings) + .map(|span| (span, true)) + .unwrap_or_else(|| (item.attr_span(tcx), false)); + + tcx.node_span_lint(crate::lint::UNPORTABLE_MARKDOWN, hir_id, span, |lint| { + lint.primary_message("unportable markdown"); + lint.help(format!("confusing block quote with no space after the `>` marker")); + if precise { + lint.span_suggestion( + span.shrink_to_hi(), + "if the quote is intended, add a space", + " ", + Applicability::MaybeIncorrect, + ); + lint.span_suggestion( + span.shrink_to_lo(), + "if it should not be a quote, escape it", + "\\", + Applicability::MaybeIncorrect, + ); + } + }); + } + for (_caret, span) in missing_footnote_references { + let (ref_span, precise) = + source_span_for_markdown_range(tcx, &dox, &span, &item.attrs.doc_strings) + .map(|span| (span, true)) + .unwrap_or_else(|| (item.attr_span(tcx), false)); + + tcx.node_span_lint(crate::lint::UNPORTABLE_MARKDOWN, hir_id, ref_span, |lint| { + lint.primary_message("unportable markdown"); + if precise { + lint.span_suggestion( + ref_span.shrink_to_lo(), + "if it should not be a footnote, escape it", + "\\", + Applicability::MaybeIncorrect, + ); + } + if dox.as_bytes().get(span.end) == Some(&b'[') { + lint.help("confusing footnote reference and link"); + if precise { + lint.span_suggestion( + ref_span.shrink_to_hi(), + "if the footnote is intended, add a space", + " ", + Applicability::MaybeIncorrect, + ); + } else { + lint.help("there should be a space between the link and the footnote"); + } + } + }); + } +} diff --git a/tests/rustdoc-ui/unportable-markdown.rs b/tests/rustdoc-ui/unportable-markdown.rs new file mode 100644 index 0000000000000..8035e680f3cf4 --- /dev/null +++ b/tests/rustdoc-ui/unportable-markdown.rs @@ -0,0 +1,63 @@ +// https://internals.rust-lang.org/t/proposal-migrate-the-syntax-of-rustdoc-markdown-footnotes-to-be-compatible-with-the-syntax-used-in-github/18929 +// +// A series of test cases for CommonMark corner cases that pulldown-cmark 0.11 fixes. +// +// This version of the lint is targeted at two especially-common cases where docs got broken. +// Other differences in parsing should not warn. +#![allow(rustdoc::broken_intra_doc_links)] +#![deny(rustdoc::unportable_markdown)] + +/// +/// +/// Test footnote [^foot]. +/// +/// [^foot]: This is nested within the footnote now, but didn't used to be. +/// +/// This is a multi-paragraph footnote. +pub struct GfmFootnotes; + +/// +/// +/// test [^foo][^bar] +//~^ ERROR unportable markdown +/// +/// [^foo]: test +/// [^bar]: test2 +pub struct FootnoteSmashedName; + +/// +/// +/// - _t +/// # test +/// t_ +pub struct NestingCornerCase; + +/// +/// +/// *~~__emphasis strike strong__~~* ~~*__strike emphasis strong__*~~ +pub struct Emphasis1; + +/// +/// +/// | +/// | +pub struct NotEnoughTable; + +/// +/// +/// foo +/// >bar +//~^ ERROR unportable markdown +pub struct BlockQuoteNoSpace; + +/// Negative test. +/// +/// foo +/// > bar +pub struct BlockQuoteSpace; + +/// Negative test. +/// +/// >bar +/// baz +pub struct BlockQuoteNoSpaceStart; diff --git a/tests/rustdoc-ui/unportable-markdown.stderr b/tests/rustdoc-ui/unportable-markdown.stderr new file mode 100644 index 0000000000000..b524aca25aef9 --- /dev/null +++ b/tests/rustdoc-ui/unportable-markdown.stderr @@ -0,0 +1,39 @@ +error: unportable markdown + --> $DIR/unportable-markdown.rs:21:10 + | +LL | /// test [^foo][^bar] + | ^^^^^^ + | + = help: confusing footnote reference and link +note: the lint level is defined here + --> $DIR/unportable-markdown.rs:8:9 + | +LL | #![deny(rustdoc::unportable_markdown)] + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +help: if it should not be a footnote, escape it + | +LL | /// test \[^foo][^bar] + | + +help: if the footnote is intended, add a space + | +LL | /// test [^foo] [^bar] + | + + +error: unportable markdown + --> $DIR/unportable-markdown.rs:49:5 + | +LL | /// >bar + | ^ + | + = help: confusing block quote with no space after the `>` marker +help: if the quote is intended, add a space + | +LL | /// > bar + | + +help: if it should not be a quote, escape it + | +LL | /// \>bar + | + + +error: aborting due to 2 previous errors + From cc1b3ee7f1601499ec96dcdfcf6ac1ee40657e6b Mon Sep 17 00:00:00 2001 From: Michael Howell Date: Sat, 29 Jun 2024 11:25:12 -0700 Subject: [PATCH 3/4] clippy: update to pulldown-cmark 0.11 --- src/tools/clippy/clippy_lints/src/doc/mod.rs | 32 ++++++++++---------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/src/tools/clippy/clippy_lints/src/doc/mod.rs b/src/tools/clippy/clippy_lints/src/doc/mod.rs index 3d875e7ac2d3f..3e210fd153bf5 100644 --- a/src/tools/clippy/clippy_lints/src/doc/mod.rs +++ b/src/tools/clippy/clippy_lints/src/doc/mod.rs @@ -6,10 +6,10 @@ use clippy_utils::ty::is_type_diagnostic_item; use clippy_utils::visitors::Visitable; use clippy_utils::{in_constant, is_entrypoint_fn, is_trait_impl_item, method_chain_args}; use pulldown_cmark::Event::{ - Code, End, FootnoteReference, HardBreak, Html, Rule, SoftBreak, Start, TaskListMarker, Text, + Code, DisplayMath, End, FootnoteReference, HardBreak, Html, InlineHtml, InlineMath, Rule, SoftBreak, Start, TaskListMarker, Text, }; use pulldown_cmark::Tag::{BlockQuote, CodeBlock, FootnoteDefinition, Heading, Item, Link, Paragraph}; -use pulldown_cmark::{BrokenLink, CodeBlockKind, CowStr, Options}; +use pulldown_cmark::{BrokenLink, CodeBlockKind, CowStr, Options, TagEnd}; use rustc_ast::ast::Attribute; use rustc_data_structures::fx::FxHashSet; use rustc_hir::intravisit::{self, Visitor}; @@ -659,7 +659,7 @@ fn check_doc<'a, Events: Iterator, Range { + Html(tag) | InlineHtml(tag) => { if tag.starts_with(", Range { + Start(BlockQuote(_)) => { blockquote_level += 1; containers.push(Container::Blockquote); }, - End(BlockQuote) => { + End(TagEnd::BlockQuote) => { blockquote_level -= 1; containers.pop(); }, @@ -699,15 +699,15 @@ fn check_doc<'a, Events: Iterator, Range { + End(TagEnd::CodeBlock) => { in_code = false; is_rust = false; ignore = false; }, - Start(Link(_, url, _)) => in_link = Some(url), - End(Link(..)) => in_link = None, - Start(Heading(_, _, _) | Paragraph | Item) => { - if let Start(Heading(_, _, _)) = event { + Start(Link { dest_url, .. }) => in_link = Some(dest_url), + End(TagEnd::Link) => in_link = None, + Start(Heading { .. } | Paragraph | Item) => { + if let Start(Heading { .. }) = event { in_heading = true; } if let Start(Item) = event { @@ -720,11 +720,11 @@ fn check_doc<'a, Events: Iterator, Range { - if let End(Heading(_, _, _)) = event { + End(TagEnd::Heading(_) | TagEnd::Paragraph | TagEnd::Item) => { + if let End(TagEnd::Heading(_)) = event { in_heading = false; } - if let End(Item) = event { + if let End(TagEnd::Item) = event { containers.pop(); } if ticks_unbalanced && let Some(span) = fragments.span(cx, paragraph_range.clone()) { @@ -746,8 +746,8 @@ fn check_doc<'a, Events: Iterator, Range in_footnote_definition = true, - End(FootnoteDefinition(..)) => in_footnote_definition = false, - Start(_tag) | End(_tag) => (), // We don't care about other tags + End(TagEnd::FootnoteDefinition) => in_footnote_definition = false, + Start(_) | End(_) => (), // We don't care about other tags SoftBreak | HardBreak => { if !containers.is_empty() && let Some((next_event, next_range)) = events.peek() @@ -765,7 +765,7 @@ fn check_doc<'a, Events: Iterator, Range (), + TaskListMarker(_) | Code(_) | Rule | InlineMath(..) | DisplayMath(..) => (), FootnoteReference(text) | Text(text) => { paragraph_range.end = range.end; ticks_unbalanced |= text.contains('`') && !in_code; From c8592da16a9e16535f3dfb9359066113f8656de0 Mon Sep 17 00:00:00 2001 From: Michael Howell Date: Sun, 30 Jun 2024 19:04:14 -0700 Subject: [PATCH 4/4] rustc_data_structures: fix wrong markdown syntax This didn't produce working footnote links. The unportable markdown lint warned about it. --- compiler/rustc_data_structures/src/sync.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/compiler/rustc_data_structures/src/sync.rs b/compiler/rustc_data_structures/src/sync.rs index 79ceb28abb518..058a675c40d7b 100644 --- a/compiler/rustc_data_structures/src/sync.rs +++ b/compiler/rustc_data_structures/src/sync.rs @@ -35,11 +35,11 @@ //! | | | | //! | `ParallelIterator` | `Iterator` | `rayon::iter::ParallelIterator` | //! -//! [^1] `MTLock` is similar to `Lock`, but the serial version avoids the cost +//! [^1]: `MTLock` is similar to `Lock`, but the serial version avoids the cost //! of a `RefCell`. This is appropriate when interior mutability is not //! required. //! -//! [^2] `MTRef`, `MTLockRef` are type aliases. +//! [^2]: `MTRef`, `MTLockRef` are type aliases. pub use crate::marker::*; use std::collections::HashMap;