From f5ca02c334b2eaa6817c15d6e2c88ab98d2a2054 Mon Sep 17 00:00:00 2001 From: Vadim Petrochenkov Date: Tue, 1 Feb 2022 20:30:32 +0800 Subject: [PATCH 1/6] proc_macro: Add a workaround for rustdoc --- Cargo.lock | 1 + library/proc_macro/Cargo.toml | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/Cargo.lock b/Cargo.lock index e3a9eb349368a..fbc9ca7051f6d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2894,6 +2894,7 @@ dependencies = [ name = "proc_macro" version = "0.0.0" dependencies = [ + "core", "std", ] diff --git a/library/proc_macro/Cargo.toml b/library/proc_macro/Cargo.toml index db5e2e4e245ea..e54a50aa15c61 100644 --- a/library/proc_macro/Cargo.toml +++ b/library/proc_macro/Cargo.toml @@ -5,3 +5,7 @@ edition = "2021" [dependencies] std = { path = "../std" } +# Workaround: when documenting this crate rustdoc will try to load crate named +# `core` when resolving doc links. Without this line a different `core` will be +# loaded from sysroot causing duplicate lang items and other similar errors. +core = { path = "../core" } From e2d3a4f63187ed6b092b5ec4a7d8b66f897ca308 Mon Sep 17 00:00:00 2001 From: Vadim Petrochenkov Date: Sat, 16 Apr 2022 23:49:37 +0300 Subject: [PATCH 2/6] rustc_metadata: Store a flag telling whether an item may have doc links in its attributes This should be cheap on rustc side, but it's significant optimization for rustdoc that won't need to decode and process attributes unnecessarily --- compiler/rustc_ast/src/attr/mod.rs | 5 +++++ compiler/rustc_ast/src/util/comments.rs | 8 ++++++++ compiler/rustc_metadata/src/rmeta/decoder.rs | 4 ++++ .../src/rmeta/decoder/cstore_impl.rs | 4 ++++ compiler/rustc_metadata/src/rmeta/encoder.rs | 14 +++++++++++--- compiler/rustc_metadata/src/rmeta/mod.rs | 1 + compiler/rustc_metadata/src/rmeta/table.rs | 14 ++++++++++++++ .../intra-doc/email-address-localhost.rs | 2 +- .../intra-doc/email-address-localhost.stderr | 16 ---------------- 9 files changed, 48 insertions(+), 20 deletions(-) delete mode 100644 src/test/rustdoc-ui/intra-doc/email-address-localhost.stderr diff --git a/compiler/rustc_ast/src/attr/mod.rs b/compiler/rustc_ast/src/attr/mod.rs index 80caf37d7099d..9a6d12faa605d 100644 --- a/compiler/rustc_ast/src/attr/mod.rs +++ b/compiler/rustc_ast/src/attr/mod.rs @@ -9,6 +9,7 @@ use crate::token::{self, CommentKind, Token}; use crate::tokenstream::{AttrAnnotatedTokenStream, AttrAnnotatedTokenTree}; use crate::tokenstream::{DelimSpan, Spacing, TokenTree, TreeAndSpacing}; use crate::tokenstream::{LazyTokenStream, TokenStream}; +use crate::util::comments; use rustc_index::bit_set::GrowableBitSet; use rustc_span::source_map::BytePos; @@ -262,6 +263,10 @@ impl Attribute { } } + pub fn may_have_doc_links(&self) -> bool { + self.doc_str().map_or(false, |s| comments::may_have_doc_links(s.as_str())) + } + pub fn get_normal_item(&self) -> &AttrItem { match self.kind { AttrKind::Normal(ref item, _) => item, diff --git a/compiler/rustc_ast/src/util/comments.rs b/compiler/rustc_ast/src/util/comments.rs index 8730aeb0f3bff..b4fff0022e295 100644 --- a/compiler/rustc_ast/src/util/comments.rs +++ b/compiler/rustc_ast/src/util/comments.rs @@ -24,6 +24,14 @@ pub struct Comment { pub pos: BytePos, } +/// A fast conservative estimate on whether the string can contain documentation links. +/// A pair of square brackets `[]` must exist in the string, but we only search for the +/// opening bracket because brackets always go in pairs in practice. +#[inline] +pub fn may_have_doc_links(s: &str) -> bool { + s.contains('[') +} + /// Makes a doc string more presentable to users. /// Used by rustdoc and perhaps other tools, but not by rustc. pub fn beautify_doc_string(data: Symbol, kind: CommentKind) -> Symbol { diff --git a/compiler/rustc_metadata/src/rmeta/decoder.rs b/compiler/rustc_metadata/src/rmeta/decoder.rs index c0f2319f003a0..77afa7b4a5cf0 100644 --- a/compiler/rustc_metadata/src/rmeta/decoder.rs +++ b/compiler/rustc_metadata/src/rmeta/decoder.rs @@ -1744,6 +1744,10 @@ impl<'a, 'tcx> CrateMetadataRef<'a> { adjustments: generator_data.adjustments, }) } + + fn get_may_have_doc_links(self, index: DefIndex) -> bool { + self.root.tables.may_have_doc_links.get(self, index).is_some() + } } impl CrateMetadata { diff --git a/compiler/rustc_metadata/src/rmeta/decoder/cstore_impl.rs b/compiler/rustc_metadata/src/rmeta/decoder/cstore_impl.rs index 6a8f1dec0c5d5..6b1f7d55026ad 100644 --- a/compiler/rustc_metadata/src/rmeta/decoder/cstore_impl.rs +++ b/compiler/rustc_metadata/src/rmeta/decoder/cstore_impl.rs @@ -531,6 +531,10 @@ impl CStore { ) -> impl Iterator + '_ { self.get_crate_data(cnum).get_all_incoherent_impls() } + + pub fn may_have_doc_links_untracked(&self, def_id: DefId) -> bool { + self.get_crate_data(def_id.krate).get_may_have_doc_links(def_id.index) + } } impl CrateStore for CStore { diff --git a/compiler/rustc_metadata/src/rmeta/encoder.rs b/compiler/rustc_metadata/src/rmeta/encoder.rs index 066bcb428f6ac..f485e09913e9d 100644 --- a/compiler/rustc_metadata/src/rmeta/encoder.rs +++ b/compiler/rustc_metadata/src/rmeta/encoder.rs @@ -977,6 +977,14 @@ fn should_encode_generics(def_kind: DefKind) -> bool { } impl<'a, 'tcx> EncodeContext<'a, 'tcx> { + fn encode_attrs(&mut self, def_id: DefId) { + let attrs = self.tcx.get_attrs(def_id); + record!(self.tables.attributes[def_id] <- attrs); + if attrs.iter().any(|attr| attr.may_have_doc_links()) { + self.tables.may_have_doc_links.set(def_id.index, ()); + } + } + fn encode_def_ids(&mut self) { if self.is_proc_macro { return; @@ -989,7 +997,7 @@ impl<'a, 'tcx> EncodeContext<'a, 'tcx> { let Some(def_kind) = def_kind else { continue }; self.tables.opt_def_kind.set(def_id.index, def_kind); record!(self.tables.def_span[def_id] <- tcx.def_span(def_id)); - record!(self.tables.attributes[def_id] <- tcx.get_attrs(def_id)); + self.encode_attrs(def_id); record!(self.tables.expn_that_defined[def_id] <- self.tcx.expn_that_defined(def_id)); if should_encode_visibility(def_kind) { record!(self.tables.visibility[def_id] <- self.tcx.visibility(def_id)); @@ -1651,7 +1659,7 @@ impl<'a, 'tcx> EncodeContext<'a, 'tcx> { self.tables.opt_def_kind.set(LOCAL_CRATE.as_def_id().index, DefKind::Mod); record!(self.tables.def_span[LOCAL_CRATE.as_def_id()] <- tcx.def_span(LOCAL_CRATE.as_def_id())); - record!(self.tables.attributes[LOCAL_CRATE.as_def_id()] <- tcx.get_attrs(LOCAL_CRATE.as_def_id())); + self.encode_attrs(LOCAL_CRATE.as_def_id()); record!(self.tables.visibility[LOCAL_CRATE.as_def_id()] <- tcx.visibility(LOCAL_CRATE.as_def_id())); if let Some(stability) = stability { record!(self.tables.lookup_stability[LOCAL_CRATE.as_def_id()] <- stability); @@ -1692,7 +1700,7 @@ impl<'a, 'tcx> EncodeContext<'a, 'tcx> { let def_id = id.to_def_id(); self.tables.opt_def_kind.set(def_id.index, DefKind::Macro(macro_kind)); record!(self.tables.kind[def_id] <- EntryKind::ProcMacro(macro_kind)); - record!(self.tables.attributes[def_id] <- attrs); + self.encode_attrs(def_id); record!(self.tables.def_keys[def_id] <- def_key); record!(self.tables.def_ident_span[def_id] <- span); record!(self.tables.def_span[def_id] <- span); diff --git a/compiler/rustc_metadata/src/rmeta/mod.rs b/compiler/rustc_metadata/src/rmeta/mod.rs index e1a1589adb322..f1498665ff385 100644 --- a/compiler/rustc_metadata/src/rmeta/mod.rs +++ b/compiler/rustc_metadata/src/rmeta/mod.rs @@ -360,6 +360,7 @@ define_tables! { def_path_hashes: Table, proc_macro_quoted_spans: Table>, generator_diagnostic_data: Table>>, + may_have_doc_links: Table, } #[derive(Copy, Clone, MetadataEncodable, MetadataDecodable)] diff --git a/compiler/rustc_metadata/src/rmeta/table.rs b/compiler/rustc_metadata/src/rmeta/table.rs index 7a23cba536a0a..53fc2efe00bb7 100644 --- a/compiler/rustc_metadata/src/rmeta/table.rs +++ b/compiler/rustc_metadata/src/rmeta/table.rs @@ -186,6 +186,20 @@ impl FixedSizeEncoding for Option { } } +impl FixedSizeEncoding for Option<()> { + type ByteArray = [u8; 1]; + + #[inline] + fn from_bytes(b: &[u8; 1]) -> Self { + (b[0] != 0).then(|| ()) + } + + #[inline] + fn write_to_bytes(self, b: &mut [u8; 1]) { + b[0] = self.is_some() as u8 + } +} + // NOTE(eddyb) there could be an impl for `usize`, which would enable a more // generic `Lazy` impl, but in the general case we might not need / want to // fit every `usize` in `u32`. diff --git a/src/test/rustdoc-ui/intra-doc/email-address-localhost.rs b/src/test/rustdoc-ui/intra-doc/email-address-localhost.rs index 9465e8e7ab996..7a5156e81c4c8 100644 --- a/src/test/rustdoc-ui/intra-doc/email-address-localhost.rs +++ b/src/test/rustdoc-ui/intra-doc/email-address-localhost.rs @@ -1,7 +1,7 @@ // normalize-stderr-test: "nightly|beta|1\.[0-9][0-9]\.[0-9]" -> "$$CHANNEL" +// check-pass #![deny(warnings)] //! Email me at . -//~^ ERROR unknown disambiguator `hello` //! This should *not* warn: . diff --git a/src/test/rustdoc-ui/intra-doc/email-address-localhost.stderr b/src/test/rustdoc-ui/intra-doc/email-address-localhost.stderr deleted file mode 100644 index 1b07828fc6e55..0000000000000 --- a/src/test/rustdoc-ui/intra-doc/email-address-localhost.stderr +++ /dev/null @@ -1,16 +0,0 @@ -error: unknown disambiguator `hello` - --> $DIR/email-address-localhost.rs:4:18 - | -LL | //! Email me at . - | ^^^^^ - | -note: the lint level is defined here - --> $DIR/email-address-localhost.rs:2:9 - | -LL | #![deny(warnings)] - | ^^^^^^^^ - = note: `#[deny(rustdoc::broken_intra_doc_links)]` implied by `#[deny(warnings)]` - = note: see https://doc.rust-lang.org/$CHANNEL/rustdoc/linking-to-items-by-name.html#namespaces-and-disambiguators for more info about disambiguators - -error: aborting due to previous error - From 72ed1014281cbb90e7a1d60f7508a5201cb0ae5f Mon Sep 17 00:00:00 2001 From: Vadim Petrochenkov Date: Sat, 16 Apr 2022 23:53:13 +0300 Subject: [PATCH 3/6] rustdoc: Optimize and refactor doc link resolution - Cache doc link resolutions obtained early - Cache markdown links retrieved from doc strings early - Rename and restructure the code in early doc link resolution to be closer to #94857 --- src/librustdoc/clean/types.rs | 9 +- src/librustdoc/core.rs | 5 +- .../passes/collect_intra_doc_links.rs | 32 +++++- .../passes/collect_intra_doc_links/early.rs | 106 ++++++++++++------ 4 files changed, 112 insertions(+), 40 deletions(-) diff --git a/src/librustdoc/clean/types.rs b/src/librustdoc/clean/types.rs index e30bc6e0a97ee..bc9f64e1afcfe 100644 --- a/src/librustdoc/clean/types.rs +++ b/src/librustdoc/clean/types.rs @@ -1088,6 +1088,13 @@ impl Attributes { crate fn from_ast( attrs: &[ast::Attribute], additional_attrs: Option<(&[ast::Attribute], DefId)>, + ) -> Attributes { + Attributes::from_ast_iter(attrs.iter(), additional_attrs) + } + + crate fn from_ast_iter<'a>( + attrs: impl Iterator, + additional_attrs: Option<(&[ast::Attribute], DefId)>, ) -> Attributes { let mut doc_strings: Vec = vec![]; let clean_attr = |(attr, parent_module): (&ast::Attribute, Option)| { @@ -1115,7 +1122,7 @@ impl Attributes { let other_attrs = additional_attrs .into_iter() .flat_map(|(attrs, id)| attrs.iter().map(move |attr| (attr, Some(id)))) - .chain(attrs.iter().map(|attr| (attr, None))) + .chain(attrs.map(|attr| (attr, None))) .filter_map(clean_attr) .collect(); diff --git a/src/librustdoc/core.rs b/src/librustdoc/core.rs index b9e20c41b681f..adf19aa8e7494 100644 --- a/src/librustdoc/core.rs +++ b/src/librustdoc/core.rs @@ -4,7 +4,7 @@ use rustc_data_structures::sync::{self, Lrc}; use rustc_errors::emitter::{Emitter, EmitterWriter}; use rustc_errors::json::JsonEmitter; use rustc_feature::UnstableFeatures; -use rustc_hir::def::Res; +use rustc_hir::def::{Namespace, Res}; use rustc_hir::def_id::{DefId, DefIdMap, LocalDefId}; use rustc_hir::intravisit::{self, Visitor}; use rustc_hir::{HirId, Path, TraitCandidate}; @@ -29,11 +29,14 @@ use crate::clean::inline::build_external_trait; use crate::clean::{self, ItemId, TraitWithExtraInfo}; use crate::config::{Options as RustdocOptions, OutputFormat, RenderOptions}; use crate::formats::cache::Cache; +use crate::html::markdown::MarkdownLink; use crate::passes::{self, Condition::*}; crate use rustc_session::config::{DebuggingOptions, Input, Options}; crate struct ResolverCaches { + crate markdown_links: Option>>, + crate doc_link_resolutions: FxHashMap<(Symbol, Namespace, DefId), Option>>, /// Traits in scope for a given module. /// See `collect_intra_doc_links::traits_implemented_by` for more details. crate traits_in_scope: DefIdMap>, diff --git a/src/librustdoc/passes/collect_intra_doc_links.rs b/src/librustdoc/passes/collect_intra_doc_links.rs index c48f8bd0c7cc5..823a94048eb93 100644 --- a/src/librustdoc/passes/collect_intra_doc_links.rs +++ b/src/librustdoc/passes/collect_intra_doc_links.rs @@ -3,6 +3,7 @@ //! [RFC 1946]: https://github.com/rust-lang/rfcs/blob/master/text/1946-intra-rustdoc-links.md use pulldown_cmark::LinkType; +use rustc_ast::util::comments::may_have_doc_links; use rustc_data_structures::{fx::FxHashMap, intern::Interned, stable_set::FxHashSet}; use rustc_errors::{Applicability, Diagnostic}; use rustc_hir::def::Namespace::*; @@ -556,7 +557,15 @@ impl<'a, 'tcx> LinkCollector<'a, 'tcx> { // Resolver doesn't know about true, false, and types that aren't paths (e.g. `()`). let result = self .cx - .enter_resolver(|resolver| resolver.resolve_rustdoc_path(path_str, ns, module_id)) + .resolver_caches + .doc_link_resolutions + .get(&(Symbol::intern(path_str), ns, module_id)) + .copied() + .unwrap_or_else(|| { + self.cx.enter_resolver(|resolver| { + resolver.resolve_rustdoc_path(path_str, ns, module_id) + }) + }) .and_then(|res| res.try_into().ok()) .or_else(|| resolve_primitive(path_str, ns)) .or_else(|| self.resolve_macro_rules(path_str, ns)); @@ -1041,16 +1050,29 @@ impl<'a, 'tcx> DocVisitor for LinkCollector<'a, 'tcx> { // Rather than merging all documentation into one, resolve it one attribute at a time // so we know which module it came from. for (parent_module, doc) in item.attrs.collapsed_doc_value_by_module_level() { + if !may_have_doc_links(&doc) { + continue; + } debug!("combined_docs={}", doc); // NOTE: if there are links that start in one crate and end in another, this will not resolve them. // This is a degenerate case and it's not supported by rustdoc. let parent_node = parent_module.or(parent_node); - for md_link in markdown_links(&doc) { + let mut tmp_links = self + .cx + .resolver_caches + .markdown_links + .take() + .expect("`markdown_links` are already borrowed"); + if !tmp_links.contains_key(&doc) { + tmp_links.insert(doc.clone(), markdown_links(&doc)); + } + for md_link in &tmp_links[&doc] { let link = self.resolve_link(&item, &doc, parent_node, md_link); if let Some(link) = link { self.cx.cache.intra_doc_links.entry(item.item_id).or_default().push(link); } } + self.cx.resolver_caches.markdown_links = Some(tmp_links); } if item.is_mod() { @@ -1181,7 +1203,7 @@ impl LinkCollector<'_, '_> { item: &Item, dox: &str, parent_node: Option, - ori_link: MarkdownLink, + ori_link: &MarkdownLink, ) -> Option { trace!("considering link '{}'", ori_link.link); @@ -1320,7 +1342,7 @@ impl LinkCollector<'_, '_> { } Some(ItemLink { - link: ori_link.link, + link: ori_link.link.clone(), link_text, did: res.def_id(self.cx.tcx), fragment, @@ -1343,7 +1365,7 @@ impl LinkCollector<'_, '_> { &diag_info, )?; let id = clean::register_res(self.cx, rustc_hir::def::Res::Def(kind, id)); - Some(ItemLink { link: ori_link.link, link_text, did: id, fragment }) + Some(ItemLink { link: ori_link.link.clone(), link_text, did: id, fragment }) } } } diff --git a/src/librustdoc/passes/collect_intra_doc_links/early.rs b/src/librustdoc/passes/collect_intra_doc_links/early.rs index dffceff045d0b..e8920d5e2886d 100644 --- a/src/librustdoc/passes/collect_intra_doc_links/early.rs +++ b/src/librustdoc/passes/collect_intra_doc_links/early.rs @@ -1,19 +1,20 @@ use crate::clean::Attributes; use crate::core::ResolverCaches; -use crate::html::markdown::markdown_links; +use crate::html::markdown::{markdown_links, MarkdownLink}; use crate::passes::collect_intra_doc_links::preprocess_link; use rustc_ast::visit::{self, AssocCtxt, Visitor}; use rustc_ast::{self as ast, ItemKind}; use rustc_ast_lowering::ResolverAstLowering; -use rustc_hir::def::Namespace::TypeNS; -use rustc_hir::def::{DefKind, Res}; +use rustc_data_structures::fx::FxHashMap; +use rustc_hir::def::Namespace::*; +use rustc_hir::def::{DefKind, Namespace, Res}; use rustc_hir::def_id::{DefId, DefIdMap, DefIdSet, LocalDefId, CRATE_DEF_ID}; use rustc_hir::TraitCandidate; use rustc_middle::ty::{DefIdTree, Visibility}; use rustc_resolve::{ParentScope, Resolver}; use rustc_session::config::Externs; -use rustc_span::SyntaxContext; +use rustc_span::{Symbol, SyntaxContext}; use std::collections::hash_map::Entry; use std::mem; @@ -28,6 +29,8 @@ crate fn early_resolve_intra_doc_links( resolver, current_mod: CRATE_DEF_ID, visited_mods: Default::default(), + markdown_links: Default::default(), + doc_link_resolutions: Default::default(), traits_in_scope: Default::default(), all_traits: Default::default(), all_trait_impls: Default::default(), @@ -36,7 +39,7 @@ crate fn early_resolve_intra_doc_links( // Overridden `visit_item` below doesn't apply to the crate root, // so we have to visit its attributes and reexports separately. - link_resolver.load_links_in_attrs(&krate.attrs); + link_resolver.resolve_doc_links_local(&krate.attrs); link_resolver.process_module_children_or_reexports(CRATE_DEF_ID.to_def_id()); visit::walk_crate(&mut link_resolver, krate); link_resolver.process_extern_impls(); @@ -50,6 +53,8 @@ crate fn early_resolve_intra_doc_links( } ResolverCaches { + markdown_links: Some(link_resolver.markdown_links), + doc_link_resolutions: link_resolver.doc_link_resolutions, traits_in_scope: link_resolver.traits_in_scope, all_traits: Some(link_resolver.all_traits), all_trait_impls: Some(link_resolver.all_trait_impls), @@ -57,10 +62,18 @@ crate fn early_resolve_intra_doc_links( } } +fn doc_attrs<'a>(attrs: impl Iterator) -> Attributes { + let mut attrs = Attributes::from_ast_iter(attrs.filter(|attr| attr.doc_str().is_some()), None); + attrs.unindent_doc_comments(); + attrs +} + struct EarlyDocLinkResolver<'r, 'ra> { resolver: &'r mut Resolver<'ra>, current_mod: LocalDefId, visited_mods: DefIdSet, + markdown_links: FxHashMap>, + doc_link_resolutions: FxHashMap<(Symbol, Namespace, DefId), Option>>, traits_in_scope: DefIdMap>, all_traits: Vec, all_trait_impls: Vec, @@ -92,18 +105,11 @@ impl EarlyDocLinkResolver<'_, '_> { } } - fn add_traits_in_parent_scope(&mut self, def_id: DefId) { - if let Some(module_id) = self.resolver.parent(def_id) { - self.add_traits_in_scope(module_id); - } - } - /// Add traits in scope for links in impls collected by the `collect-intra-doc-links` pass. /// That pass filters impls using type-based information, but we don't yet have such /// information here, so we just conservatively calculate traits in scope for *all* modules /// having impls in them. fn process_extern_impls(&mut self) { - // FIXME: Need to resolve doc links on all these impl and trait items below. // Resolving links in already existing crates may trigger loading of new crates. let mut start_cnum = 0; loop { @@ -124,7 +130,7 @@ impl EarlyDocLinkResolver<'_, '_> { // the current crate, and links in their doc comments are not resolved. for &def_id in &all_traits { if self.resolver.cstore().visibility_untracked(def_id) == Visibility::Public { - self.add_traits_in_parent_scope(def_id); + self.resolve_doc_links_extern_impl(def_id, false); } } for &(trait_def_id, impl_def_id, simplified_self_ty) in &all_trait_impls { @@ -135,17 +141,17 @@ impl EarlyDocLinkResolver<'_, '_> { == Visibility::Public }) { - self.add_traits_in_parent_scope(impl_def_id); + self.resolve_doc_links_extern_impl(impl_def_id, false); } } for (ty_def_id, impl_def_id) in all_inherent_impls { if self.resolver.cstore().visibility_untracked(ty_def_id) == Visibility::Public { - self.add_traits_in_parent_scope(impl_def_id); + self.resolve_doc_links_extern_impl(impl_def_id, true); } } - for def_id in all_incoherent_impls { - self.add_traits_in_parent_scope(def_id); + for impl_def_id in all_incoherent_impls { + self.resolve_doc_links_extern_impl(impl_def_id, true); } self.all_traits.extend(all_traits); @@ -161,16 +167,52 @@ impl EarlyDocLinkResolver<'_, '_> { } } - fn load_links_in_attrs(&mut self, attrs: &[ast::Attribute]) { + fn resolve_doc_links_extern_impl(&mut self, def_id: DefId, _is_inherent: bool) { + // FIXME: Resolve links in associated items in addition to traits themselves, + // `force` is used to provide traits in scope for the associated items. + self.resolve_doc_links_extern_outer(def_id, def_id, true); + } + + fn resolve_doc_links_extern_outer(&mut self, def_id: DefId, scope_id: DefId, force: bool) { + if !force && !self.resolver.cstore().may_have_doc_links_untracked(def_id) { + return; + } + // FIXME: actually resolve links, not just add traits in scope. + if let Some(parent_id) = self.resolver.parent(scope_id) { + self.add_traits_in_scope(parent_id); + } + } + + fn resolve_doc_links_extern_inner(&mut self, def_id: DefId) { + if !self.resolver.cstore().may_have_doc_links_untracked(def_id) { + return; + } + // FIXME: actually resolve links, not just add traits in scope. + self.add_traits_in_scope(def_id); + } + + fn resolve_doc_links_local(&mut self, attrs: &[ast::Attribute]) { + if !attrs.iter().any(|attr| attr.may_have_doc_links()) { + return; + } let module_id = self.current_mod.to_def_id(); + self.resolve_doc_links(doc_attrs(attrs.iter()), module_id); + } + + fn resolve_doc_links(&mut self, attrs: Attributes, module_id: DefId) { let mut need_traits_in_scope = false; - for (doc_module, doc) in - Attributes::from_ast(attrs, None).collapsed_doc_value_by_module_level() - { + for (doc_module, doc) in attrs.collapsed_doc_value_by_module_level() { assert_eq!(doc_module, None); - for link in markdown_links(&doc.as_str()) { + for link in self.markdown_links.entry(doc).or_insert_with_key(|doc| markdown_links(doc)) + { if let Some(Ok(pinfo)) = preprocess_link(&link) { - self.resolver.resolve_rustdoc_path(&pinfo.path_str, TypeNS, module_id); + // FIXME: Resolve the path in all namespaces and resolve its prefixes too. + let ns = TypeNS; + self.doc_link_resolutions + .entry((Symbol::intern(&pinfo.path_str), ns, module_id)) + .or_insert_with_key(|(path, ns, module_id)| { + self.resolver.resolve_rustdoc_path(path.as_str(), *ns, *module_id) + }); need_traits_in_scope = true; } } @@ -197,15 +239,13 @@ impl EarlyDocLinkResolver<'_, '_> { && module_id.is_local() { if let Some(def_id) = child.res.opt_def_id() && !def_id.is_local() { - // FIXME: Need to resolve doc links on all these extern items - // reached through reexports. let scope_id = match child.res { Res::Def(DefKind::Variant, ..) => self.resolver.parent(def_id).unwrap(), _ => def_id, }; - self.add_traits_in_parent_scope(scope_id); // Outer attribute scope + self.resolve_doc_links_extern_outer(def_id, scope_id, false); // Outer attribute scope if let Res::Def(DefKind::Mod, ..) = child.res { - self.add_traits_in_scope(def_id); // Inner attribute scope + self.resolve_doc_links_extern_inner(def_id); // Inner attribute scope } // Traits are processed in `add_extern_traits_in_scope`. if let Res::Def(DefKind::Mod | DefKind::Enum, ..) = child.res { @@ -219,10 +259,10 @@ impl EarlyDocLinkResolver<'_, '_> { impl Visitor<'_> for EarlyDocLinkResolver<'_, '_> { fn visit_item(&mut self, item: &ast::Item) { - self.load_links_in_attrs(&item.attrs); // Outer attribute scope + self.resolve_doc_links_local(&item.attrs); // Outer attribute scope if let ItemKind::Mod(..) = item.kind { let old_mod = mem::replace(&mut self.current_mod, self.resolver.local_def_id(item.id)); - self.load_links_in_attrs(&item.attrs); // Inner attribute scope + self.resolve_doc_links_local(&item.attrs); // Inner attribute scope self.process_module_children_or_reexports(self.current_mod.to_def_id()); visit::walk_item(self, item); self.current_mod = old_mod; @@ -241,22 +281,22 @@ impl Visitor<'_> for EarlyDocLinkResolver<'_, '_> { } fn visit_assoc_item(&mut self, item: &ast::AssocItem, ctxt: AssocCtxt) { - self.load_links_in_attrs(&item.attrs); + self.resolve_doc_links_local(&item.attrs); visit::walk_assoc_item(self, item, ctxt) } fn visit_foreign_item(&mut self, item: &ast::ForeignItem) { - self.load_links_in_attrs(&item.attrs); + self.resolve_doc_links_local(&item.attrs); visit::walk_foreign_item(self, item) } fn visit_variant(&mut self, v: &ast::Variant) { - self.load_links_in_attrs(&v.attrs); + self.resolve_doc_links_local(&v.attrs); visit::walk_variant(self, v) } fn visit_field_def(&mut self, field: &ast::FieldDef) { - self.load_links_in_attrs(&field.attrs); + self.resolve_doc_links_local(&field.attrs); visit::walk_field_def(self, field) } From de287df86245bdeed485373d1a3b29ec028d33f5 Mon Sep 17 00:00:00 2001 From: Vadim Petrochenkov Date: Sat, 16 Apr 2022 23:59:21 +0300 Subject: [PATCH 4/6] rustdoc: Cache preprocessed markdown links --- src/librustdoc/core.rs | 4 +- src/librustdoc/html/markdown.rs | 10 ++- .../passes/collect_intra_doc_links.rs | 84 +++++++++++-------- .../passes/collect_intra_doc_links/early.rs | 15 ++-- 4 files changed, 68 insertions(+), 45 deletions(-) diff --git a/src/librustdoc/core.rs b/src/librustdoc/core.rs index adf19aa8e7494..1db6064551cae 100644 --- a/src/librustdoc/core.rs +++ b/src/librustdoc/core.rs @@ -29,13 +29,13 @@ use crate::clean::inline::build_external_trait; use crate::clean::{self, ItemId, TraitWithExtraInfo}; use crate::config::{Options as RustdocOptions, OutputFormat, RenderOptions}; use crate::formats::cache::Cache; -use crate::html::markdown::MarkdownLink; +use crate::passes::collect_intra_doc_links::PreprocessedMarkdownLink; use crate::passes::{self, Condition::*}; crate use rustc_session::config::{DebuggingOptions, Input, Options}; crate struct ResolverCaches { - crate markdown_links: Option>>, + crate markdown_links: Option>>, crate doc_link_resolutions: FxHashMap<(Symbol, Namespace, DefId), Option>>, /// Traits in scope for a given module. /// See `collect_intra_doc_links::traits_implemented_by` for more details. diff --git a/src/librustdoc/html/markdown.rs b/src/librustdoc/html/markdown.rs index 1ebb41b5933d0..eafe6f17d44bf 100644 --- a/src/librustdoc/html/markdown.rs +++ b/src/librustdoc/html/markdown.rs @@ -1255,7 +1255,7 @@ crate struct MarkdownLink { pub range: Range, } -crate fn markdown_links(md: &str) -> Vec { +crate fn markdown_links(md: &str, filter_map: impl Fn(MarkdownLink) -> Option) -> Vec { if md.is_empty() { return vec![]; } @@ -1295,11 +1295,12 @@ crate fn markdown_links(md: &str) -> Vec { let mut push = |link: BrokenLink<'_>| { let span = span_for_link(&link.reference, link.span); - links.borrow_mut().push(MarkdownLink { + filter_map(MarkdownLink { kind: LinkType::ShortcutUnknown, link: link.reference.to_string(), range: span, - }); + }) + .map(|link| links.borrow_mut().push(link)); None }; let p = Parser::new_with_broken_link_callback(md, main_body_opts(), Some(&mut push)) @@ -1314,7 +1315,8 @@ crate fn markdown_links(md: &str) -> Vec { if let Event::Start(Tag::Link(kind, dest, _)) = ev.0 { debug!("found link: {dest}"); let span = span_for_link(&dest, ev.1); - links.borrow_mut().push(MarkdownLink { kind, link: dest.into_string(), range: span }); + filter_map(MarkdownLink { kind, link: dest.into_string(), range: span }) + .map(|link| links.borrow_mut().push(link)); } } diff --git a/src/librustdoc/passes/collect_intra_doc_links.rs b/src/librustdoc/passes/collect_intra_doc_links.rs index 823a94048eb93..c9fc14d5f71a4 100644 --- a/src/librustdoc/passes/collect_intra_doc_links.rs +++ b/src/librustdoc/passes/collect_intra_doc_links.rs @@ -160,7 +160,7 @@ impl TryFrom for Res { } /// A link failed to resolve. -#[derive(Debug)] +#[derive(Clone, Debug)] enum ResolutionFailure<'a> { /// This resolved, but with the wrong namespace. WrongNamespace { @@ -200,7 +200,7 @@ enum ResolutionFailure<'a> { Dummy, } -#[derive(Debug)] +#[derive(Clone, Debug)] enum MalformedGenerics { /// This link has unbalanced angle brackets. /// @@ -253,6 +253,7 @@ impl ResolutionFailure<'_> { } } +#[derive(Clone, Copy)] enum AnchorFailure { /// User error: `[std#x#y]` is not valid MultipleAnchors, @@ -1064,7 +1065,7 @@ impl<'a, 'tcx> DocVisitor for LinkCollector<'a, 'tcx> { .take() .expect("`markdown_links` are already borrowed"); if !tmp_links.contains_key(&doc) { - tmp_links.insert(doc.clone(), markdown_links(&doc)); + tmp_links.insert(doc.clone(), preprocessed_markdown_links(&doc)); } for md_link in &tmp_links[&doc] { let link = self.resolve_link(&item, &doc, parent_node, md_link); @@ -1088,18 +1089,19 @@ impl<'a, 'tcx> DocVisitor for LinkCollector<'a, 'tcx> { } } -enum PreprocessingError<'a> { +enum PreprocessingError { Anchor(AnchorFailure), Disambiguator(Range, String), - Resolution(ResolutionFailure<'a>, String, Option), + Resolution(ResolutionFailure<'static>, String, Option), } -impl From for PreprocessingError<'_> { +impl From for PreprocessingError { fn from(err: AnchorFailure) -> Self { Self::Anchor(err) } } +#[derive(Clone)] struct PreprocessingInfo { path_str: String, disambiguator: Option, @@ -1107,15 +1109,18 @@ struct PreprocessingInfo { link_text: String, } +// Not a typedef to avoid leaking several private structures from this module. +crate struct PreprocessedMarkdownLink(Result, MarkdownLink); + /// Returns: /// - `None` if the link should be ignored. /// - `Some(Err)` if the link should emit an error /// - `Some(Ok)` if the link is valid /// /// `link_buffer` is needed for lifetime reasons; it will always be overwritten and the contents ignored. -fn preprocess_link<'a>( - ori_link: &'a MarkdownLink, -) -> Option>> { +fn preprocess_link( + ori_link: &MarkdownLink, +) -> Option> { // [] is mostly likely not supposed to be a link if ori_link.link.is_empty() { return None; @@ -1194,6 +1199,12 @@ fn preprocess_link<'a>( })) } +fn preprocessed_markdown_links(s: &str) -> Vec { + markdown_links(s, |link| { + preprocess_link(&link).map(|pp_link| PreprocessedMarkdownLink(pp_link, link)) + }) +} + impl LinkCollector<'_, '_> { /// This is the entry point for resolving an intra-doc link. /// @@ -1203,8 +1214,9 @@ impl LinkCollector<'_, '_> { item: &Item, dox: &str, parent_node: Option, - ori_link: &MarkdownLink, + link: &PreprocessedMarkdownLink, ) -> Option { + let PreprocessedMarkdownLink(pp_link, ori_link) = link; trace!("considering link '{}'", ori_link.link); let diag_info = DiagnosticInfo { @@ -1214,28 +1226,29 @@ impl LinkCollector<'_, '_> { link_range: ori_link.range.clone(), }; - let PreprocessingInfo { ref path_str, disambiguator, extra_fragment, link_text } = - match preprocess_link(&ori_link)? { - Ok(x) => x, - Err(err) => { - match err { - PreprocessingError::Anchor(err) => anchor_failure(self.cx, diag_info, err), - PreprocessingError::Disambiguator(range, msg) => { - disambiguator_error(self.cx, diag_info, range, &msg) - } - PreprocessingError::Resolution(err, path_str, disambiguator) => { - resolution_failure( - self, - diag_info, - &path_str, - disambiguator, - smallvec![err], - ); - } + let PreprocessingInfo { path_str, disambiguator, extra_fragment, link_text } = match pp_link + { + Ok(x) => x, + Err(err) => { + match err { + PreprocessingError::Anchor(err) => anchor_failure(self.cx, diag_info, *err), + PreprocessingError::Disambiguator(range, msg) => { + disambiguator_error(self.cx, diag_info, range.clone(), msg) + } + PreprocessingError::Resolution(err, path_str, disambiguator) => { + resolution_failure( + self, + diag_info, + path_str, + *disambiguator, + smallvec![err.clone()], + ); } - return None; } - }; + return None; + } + }; + let disambiguator = *disambiguator; let inner_docs = item.inner_docs(self.cx.tcx); @@ -1272,7 +1285,7 @@ impl LinkCollector<'_, '_> { module_id, dis: disambiguator, path_str: path_str.to_owned(), - extra_fragment, + extra_fragment: extra_fragment.clone(), }, diag_info.clone(), // this struct should really be Copy, but Range is not :( matches!(ori_link.kind, LinkType::Reference | LinkType::Shortcut), @@ -1343,7 +1356,7 @@ impl LinkCollector<'_, '_> { Some(ItemLink { link: ori_link.link.clone(), - link_text, + link_text: link_text.clone(), did: res.def_id(self.cx.tcx), fragment, }) @@ -1365,7 +1378,12 @@ impl LinkCollector<'_, '_> { &diag_info, )?; let id = clean::register_res(self.cx, rustc_hir::def::Res::Def(kind, id)); - Some(ItemLink { link: ori_link.link.clone(), link_text, did: id, fragment }) + Some(ItemLink { + link: ori_link.link.clone(), + link_text: link_text.clone(), + did: id, + fragment, + }) } } } diff --git a/src/librustdoc/passes/collect_intra_doc_links/early.rs b/src/librustdoc/passes/collect_intra_doc_links/early.rs index e8920d5e2886d..a285c300b75c3 100644 --- a/src/librustdoc/passes/collect_intra_doc_links/early.rs +++ b/src/librustdoc/passes/collect_intra_doc_links/early.rs @@ -1,7 +1,7 @@ use crate::clean::Attributes; use crate::core::ResolverCaches; -use crate::html::markdown::{markdown_links, MarkdownLink}; -use crate::passes::collect_intra_doc_links::preprocess_link; +use crate::passes::collect_intra_doc_links::preprocessed_markdown_links; +use crate::passes::collect_intra_doc_links::PreprocessedMarkdownLink; use rustc_ast::visit::{self, AssocCtxt, Visitor}; use rustc_ast::{self as ast, ItemKind}; @@ -72,7 +72,7 @@ struct EarlyDocLinkResolver<'r, 'ra> { resolver: &'r mut Resolver<'ra>, current_mod: LocalDefId, visited_mods: DefIdSet, - markdown_links: FxHashMap>, + markdown_links: FxHashMap>, doc_link_resolutions: FxHashMap<(Symbol, Namespace, DefId), Option>>, traits_in_scope: DefIdMap>, all_traits: Vec, @@ -203,9 +203,12 @@ impl EarlyDocLinkResolver<'_, '_> { let mut need_traits_in_scope = false; for (doc_module, doc) in attrs.collapsed_doc_value_by_module_level() { assert_eq!(doc_module, None); - for link in self.markdown_links.entry(doc).or_insert_with_key(|doc| markdown_links(doc)) - { - if let Some(Ok(pinfo)) = preprocess_link(&link) { + let links = self + .markdown_links + .entry(doc) + .or_insert_with_key(|doc| preprocessed_markdown_links(doc)); + for PreprocessedMarkdownLink(pp_link, _) in links { + if let Ok(pinfo) = pp_link { // FIXME: Resolve the path in all namespaces and resolve its prefixes too. let ns = TypeNS; self.doc_link_resolutions From 5cce8cb4ec4159851d8f5089eefc9979d480f0ec Mon Sep 17 00:00:00 2001 From: Vadim Petrochenkov Date: Sun, 17 Apr 2022 02:01:46 +0300 Subject: [PATCH 5/6] rustdoc: Sligthly optimize `Attributes` construction and processing before doc link resolution --- src/librustdoc/clean/types.rs | 69 ++++++++----------- .../passes/collect_intra_doc_links.rs | 2 +- .../passes/collect_intra_doc_links/early.rs | 4 +- 3 files changed, 31 insertions(+), 44 deletions(-) diff --git a/src/librustdoc/clean/types.rs b/src/librustdoc/clean/types.rs index bc9f64e1afcfe..95ac3ab622a14 100644 --- a/src/librustdoc/clean/types.rs +++ b/src/librustdoc/clean/types.rs @@ -1089,42 +1089,35 @@ impl Attributes { attrs: &[ast::Attribute], additional_attrs: Option<(&[ast::Attribute], DefId)>, ) -> Attributes { - Attributes::from_ast_iter(attrs.iter(), additional_attrs) + // Additional documentation should be shown before the original documentation. + let attrs1 = additional_attrs + .into_iter() + .flat_map(|(attrs, def_id)| attrs.iter().map(move |attr| (attr, Some(def_id)))); + let attrs2 = attrs.iter().map(|attr| (attr, None)); + Attributes::from_ast_iter(attrs1.chain(attrs2), false) } crate fn from_ast_iter<'a>( - attrs: impl Iterator, - additional_attrs: Option<(&[ast::Attribute], DefId)>, + attrs: impl Iterator)>, + doc_only: bool, ) -> Attributes { - let mut doc_strings: Vec = vec![]; - let clean_attr = |(attr, parent_module): (&ast::Attribute, Option)| { - if let Some((value, kind)) = attr.doc_str_and_comment_kind() { - trace!("got doc_str={:?}", value); - let value = beautify_doc_string(value, kind); + let mut doc_strings = Vec::new(); + let mut other_attrs = Vec::new(); + for (attr, parent_module) in attrs { + if let Some((doc_str, comment_kind)) = attr.doc_str_and_comment_kind() { + trace!("got doc_str={doc_str:?}"); + let doc = beautify_doc_string(doc_str, comment_kind); let kind = if attr.is_doc_comment() { DocFragmentKind::SugaredDoc } else { DocFragmentKind::RawDoc }; - - let frag = - DocFragment { span: attr.span, doc: value, kind, parent_module, indent: 0 }; - - doc_strings.push(frag); - - None - } else { - Some(attr.clone()) + let fragment = DocFragment { span: attr.span, doc, kind, parent_module, indent: 0 }; + doc_strings.push(fragment); + } else if !doc_only { + other_attrs.push(attr.clone()); } - }; - - // Additional documentation should be shown before the original documentation - let other_attrs = additional_attrs - .into_iter() - .flat_map(|(attrs, id)| attrs.iter().map(move |attr| (attr, Some(id)))) - .chain(attrs.map(|attr| (attr, None))) - .filter_map(clean_attr) - .collect(); + } Attributes { doc_strings, other_attrs } } @@ -1145,23 +1138,17 @@ impl Attributes { } /// Return the doc-comments on this item, grouped by the module they came from. - /// /// The module can be different if this is a re-export with added documentation. - crate fn collapsed_doc_value_by_module_level(&self) -> FxHashMap, String> { - let mut ret = FxHashMap::default(); - if self.doc_strings.len() == 0 { - return ret; - } - let last_index = self.doc_strings.len() - 1; - - for (i, new_frag) in self.doc_strings.iter().enumerate() { - let out = ret.entry(new_frag.parent_module).or_default(); - add_doc_fragment(out, new_frag); - if i == last_index { - out.pop(); - } + /// + /// The last newline is not trimmed so the produced strings are reusable between + /// early and late doc link resolution regardless of their position. + crate fn prepare_to_doc_link_resolution(&self) -> FxHashMap, String> { + let mut res = FxHashMap::default(); + for fragment in &self.doc_strings { + let out_str = res.entry(fragment.parent_module).or_default(); + add_doc_fragment(out_str, fragment); } - ret + res } /// Finds all `doc` attributes as NameValues and returns their corresponding values, joined diff --git a/src/librustdoc/passes/collect_intra_doc_links.rs b/src/librustdoc/passes/collect_intra_doc_links.rs index c9fc14d5f71a4..42e87f3f9610b 100644 --- a/src/librustdoc/passes/collect_intra_doc_links.rs +++ b/src/librustdoc/passes/collect_intra_doc_links.rs @@ -1050,7 +1050,7 @@ impl<'a, 'tcx> DocVisitor for LinkCollector<'a, 'tcx> { // In the presence of re-exports, this is not the same as the module of the item. // Rather than merging all documentation into one, resolve it one attribute at a time // so we know which module it came from. - for (parent_module, doc) in item.attrs.collapsed_doc_value_by_module_level() { + for (parent_module, doc) in item.attrs.prepare_to_doc_link_resolution() { if !may_have_doc_links(&doc) { continue; } diff --git a/src/librustdoc/passes/collect_intra_doc_links/early.rs b/src/librustdoc/passes/collect_intra_doc_links/early.rs index a285c300b75c3..e2359da870edc 100644 --- a/src/librustdoc/passes/collect_intra_doc_links/early.rs +++ b/src/librustdoc/passes/collect_intra_doc_links/early.rs @@ -63,7 +63,7 @@ crate fn early_resolve_intra_doc_links( } fn doc_attrs<'a>(attrs: impl Iterator) -> Attributes { - let mut attrs = Attributes::from_ast_iter(attrs.filter(|attr| attr.doc_str().is_some()), None); + let mut attrs = Attributes::from_ast_iter(attrs.map(|attr| (attr, None)), true); attrs.unindent_doc_comments(); attrs } @@ -201,7 +201,7 @@ impl EarlyDocLinkResolver<'_, '_> { fn resolve_doc_links(&mut self, attrs: Attributes, module_id: DefId) { let mut need_traits_in_scope = false; - for (doc_module, doc) in attrs.collapsed_doc_value_by_module_level() { + for (doc_module, doc) in attrs.prepare_to_doc_link_resolution() { assert_eq!(doc_module, None); let links = self .markdown_links From ca5c752a7a04f9a5149df1399106f103c6e5ba17 Mon Sep 17 00:00:00 2001 From: Guillaume Gomez Date: Tue, 19 Apr 2022 16:08:22 +0200 Subject: [PATCH 6/6] Add regression test for #96079 --- src/test/rustdoc/early-unindent.rs | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 src/test/rustdoc/early-unindent.rs diff --git a/src/test/rustdoc/early-unindent.rs b/src/test/rustdoc/early-unindent.rs new file mode 100644 index 0000000000000..791a452c9571b --- /dev/null +++ b/src/test/rustdoc/early-unindent.rs @@ -0,0 +1,26 @@ +// This is a regression for https://github.com/rust-lang/rust/issues/96079. + +#![crate_name = "foo"] + +pub mod app { + pub struct S; + + impl S { + // @has 'foo/app/struct.S.html' + // @has - '//a[@href="../enums/enum.Foo.html#method.by_name"]' 'Foo::by_name' + /** + Doc comment hello! [`Foo::by_name`](`crate::enums::Foo::by_name`). + */ + pub fn whatever(&self) {} + } +} + +pub mod enums { + pub enum Foo { + Bar, + } + + impl Foo { + pub fn by_name(&self) {} + } +}