From d4b3a1d43c7fe971fb9cc92a00d0a4d5cb64fa5a Mon Sep 17 00:00:00 2001 From: Eric Huss Date: Tue, 12 Jan 2021 09:17:49 -0800 Subject: [PATCH 1/3] Fix panic with `cargo doc` and new resolver and proc-macros. --- src/cargo/core/compiler/unit_dependencies.rs | 16 ++++--- tests/testsuite/features2.rs | 47 ++++++++++++++++++++ 2 files changed, 57 insertions(+), 6 deletions(-) diff --git a/src/cargo/core/compiler/unit_dependencies.rs b/src/cargo/core/compiler/unit_dependencies.rs index e58d2dfc053..1361ed49e10 100644 --- a/src/cargo/core/compiler/unit_dependencies.rs +++ b/src/cargo/core/compiler/unit_dependencies.rs @@ -225,7 +225,7 @@ fn compute_deps( return compute_deps_custom_build(unit, unit_for, state); } else if unit.mode.is_doc() { // Note: this does not include doc test. - return compute_deps_doc(unit, state); + return compute_deps_doc(unit, state, unit_for); } let id = unit.pkg.package_id(); @@ -395,9 +395,13 @@ fn compute_deps_custom_build( } /// Returns the dependencies necessary to document a package. -fn compute_deps_doc(unit: &Unit, state: &mut State<'_, '_>) -> CargoResult> { +fn compute_deps_doc( + unit: &Unit, + state: &mut State<'_, '_>, + unit_for: UnitFor, +) -> CargoResult> { let deps = state - .deps(unit, UnitFor::new_normal()) + .deps(unit, unit_for) .into_iter() .filter(|&(_id, deps)| deps.iter().any(|dep| dep.kind() == DepKind::Normal)); @@ -414,7 +418,7 @@ fn compute_deps_doc(unit: &Unit, state: &mut State<'_, '_>) -> CargoResult) -> CargoResult Date: Thu, 14 Jan 2021 13:12:24 -0800 Subject: [PATCH 2/3] Remove some doc collisions. There are some cases where `cargo doc` will try to document two things with the same crate_name. This attempts to automatically remove some of those duplicates based on some rules: - Prefers dependencies for the target over dependencies for the host (such as proc-macros). - Prefers the "newest" version if it comes from the same source. There are still plenty of situations where there can be collisions, but I'm uncertain on the best way to handle those. --- src/cargo/ops/cargo_compile.rs | 114 +++++++++++++- tests/testsuite/collisions.rs | 271 +++++++++++++++++++++++++++++++++ 2 files changed, 384 insertions(+), 1 deletion(-) diff --git a/src/cargo/ops/cargo_compile.rs b/src/cargo/ops/cargo_compile.rs index 8a53c5b9b56..3ff7eb46522 100644 --- a/src/cargo/ops/cargo_compile.rs +++ b/src/cargo/ops/cargo_compile.rs @@ -36,10 +36,11 @@ use crate::core::profiles::{Profiles, UnitFor}; use crate::core::resolver::features::{self, FeaturesFor, RequestedFeatures}; use crate::core::resolver::{HasDevUnits, Resolve, ResolveOpts}; use crate::core::{FeatureValue, Package, PackageSet, Shell, Summary, Target}; -use crate::core::{PackageId, PackageIdSpec, TargetKind, Workspace}; +use crate::core::{PackageId, PackageIdSpec, SourceId, TargetKind, Workspace}; use crate::ops; use crate::ops::resolve::WorkspaceResolve; use crate::util::config::Config; +use crate::util::interning::InternedString; use crate::util::restricted_names::is_glob_pattern; use crate::util::{closest_msg, profile, CargoResult, StableHasher}; @@ -503,6 +504,12 @@ pub fn create_bcx<'a, 'cfg>( interner, )?; + // TODO: In theory, Cargo should also dedupe the roots, but I'm uncertain + // what heuristics to use in that case. + if build_config.mode == (CompileMode::Doc { deps: true }) { + remove_duplicate_doc(build_config, &mut unit_graph); + } + if build_config .requested_kinds .iter() @@ -1455,3 +1462,108 @@ fn opt_patterns_and_names( } Ok((opt_patterns, opt_names)) } + +/// Removes duplicate CompileMode::Doc units that would cause problems with +/// filename collisions. +/// +/// Rustdoc only separates units by crate name in the file directory +/// structure. If any two units with the same crate name exist, this would +/// cause a filename collision, causing different rustdoc invocations to stomp +/// on one another's files. +/// +/// Unfortunately this does not remove all duplicates, as some of them are +/// either user error, or difficult to remove. Cases that I can think of: +/// +/// - Same target name in different packages. See the `collision_doc` test. +/// - Different sources. See `collision_doc_sources` test. +/// +/// Ideally this would not be necessary. +fn remove_duplicate_doc(build_config: &BuildConfig, unit_graph: &mut UnitGraph) { + // NOTE: There is some risk that this can introduce problems because it + // may create orphans in the unit graph (parts of the tree get detached + // from the roots). I currently can't think of any ways this will cause a + // problem because all other parts of Cargo traverse the graph starting + // from the roots. Perhaps this should scan for detached units and remove + // them too? + // + // First, create a mapping of crate_name -> Unit so we can see where the + // duplicates are. + let mut all_docs: HashMap> = HashMap::new(); + for unit in unit_graph.keys() { + if unit.mode.is_doc() { + all_docs + .entry(unit.target.crate_name()) + .or_default() + .push(unit.clone()); + } + } + let mut remove = |units: Vec, reason: &str| { + for unit in &units { + log::debug!( + "removing duplicate doc due to {} for package {} target `{}`", + reason, + unit.pkg, + unit.target.name() + ); + unit_graph.remove(unit); + } + for unit_deps in unit_graph.values_mut() { + unit_deps.retain(|unit_dep| !units.iter().any(|unit| *unit == unit_dep.unit)); + } + }; + // Iterate over the duplicates and try to remove them from unit_graph. + for (_crate_name, mut units) in all_docs { + if units.len() == 1 { + continue; + } + // Prefer target over host if --target was not specified. + if build_config + .requested_kinds + .iter() + .all(CompileKind::is_host) + { + let (to_remove, remaining_units): (Vec, Vec) = + units.into_iter().partition(|unit| unit.kind.is_host()); + // Note these duplicates may not be real duplicates, since they + // might get merged in rebuild_unit_graph_shared. Either way, it + // shouldn't hurt to remove them early (although the report in the + // log might be confusing). + remove(to_remove, "host/target merger"); + units = remaining_units; + if units.len() == 1 { + continue; + } + } + // Prefer newer versions over older. + let mut source_map: HashMap<(InternedString, SourceId, CompileKind), Vec> = + HashMap::new(); + for unit in units { + let pkg_id = unit.pkg.package_id(); + // Note, this does not detect duplicates from different sources. + source_map + .entry((pkg_id.name(), pkg_id.source_id(), unit.kind)) + .or_default() + .push(unit); + } + let mut remaining_units = Vec::new(); + for (_key, mut units) in source_map { + if units.len() > 1 { + units.sort_by(|a, b| a.pkg.version().partial_cmp(b.pkg.version()).unwrap()); + // Remove any entries with version < newest. + let newest_version = units.last().unwrap().pkg.version().clone(); + let (to_remove, keep_units): (Vec, Vec) = units + .into_iter() + .partition(|unit| unit.pkg.version() < &newest_version); + remove(to_remove, "older version"); + remaining_units.extend(keep_units); + } else { + remaining_units.extend(units); + } + } + if remaining_units.len() == 1 { + continue; + } + // Are there other heuristics to remove duplicates that would make + // sense? Maybe prefer path sources over all others? + } +} diff --git a/tests/testsuite/collisions.rs b/tests/testsuite/collisions.rs index 5b94aae28ec..4315498169a 100644 --- a/tests/testsuite/collisions.rs +++ b/tests/testsuite/collisions.rs @@ -5,6 +5,7 @@ use cargo_test_support::basic_manifest; use cargo_test_support::project; +use cargo_test_support::registry::Package; use std::env; #[cargo_test] @@ -160,3 +161,273 @@ the same path; see . ) .run(); } + +#[cargo_test] +fn collision_doc_multiple_versions() { + // Multiple versions of the same package. + Package::new("old-dep", "1.0.0").publish(); + Package::new("bar", "1.0.0").dep("old-dep", "1.0").publish(); + // Note that this removes "old-dep". Just checking what happens when there + // are orphans. + Package::new("bar", "2.0.0").publish(); + let p = project() + .file( + "Cargo.toml", + r#" + [package] + name = "foo" + version = "0.1.0" + + [dependencies] + bar = "1.0" + bar2 = { package="bar", version="2.0" } + "#, + ) + .file("src/lib.rs", "") + .build(); + + // Should only document bar 2.0, should not document old-dep. + p.cargo("doc") + .with_stderr_unordered( + "\ +[UPDATING] [..] +[DOWNLOADING] crates ... +[DOWNLOADED] bar v2.0.0 [..] +[DOWNLOADED] bar v1.0.0 [..] +[DOWNLOADED] old-dep v1.0.0 [..] +[CHECKING] old-dep v1.0.0 +[CHECKING] bar v2.0.0 +[CHECKING] bar v1.0.0 +[DOCUMENTING] bar v2.0.0 +[FINISHED] [..] +[DOCUMENTING] foo v0.1.0 [..] +", + ) + .run(); +} + +#[cargo_test] +fn collision_doc_host_target_feature_split() { + // Same dependency built twice due to different features. + // + // foo v0.1.0 + // ├── common v1.0.0 + // │ └── common-dep v1.0.0 + // └── pm v0.1.0 (proc-macro) + // └── common v1.0.0 + // └── common-dep v1.0.0 + // [build-dependencies] + // └── common-dep v1.0.0 + // + // Here `common` and `common-dep` are built twice. `common-dep` has + // different features for host versus target. + Package::new("common-dep", "1.0.0") + .feature("bdep-feat", &[]) + .file( + "src/lib.rs", + r#" + /// Some doc + pub fn f() {} + + /// Another doc + #[cfg(feature = "bdep-feat")] + pub fn bdep_func() {} + "#, + ) + .publish(); + Package::new("common", "1.0.0") + .dep("common-dep", "1.0") + .file( + "src/lib.rs", + r#" + /// Some doc + pub fn f() {} + "#, + ) + .publish(); + let p = project() + .file( + "Cargo.toml", + r#" + [package] + name = "foo" + version = "0.1.0" + resolver = "2" + + [dependencies] + pm = { path = "pm" } + common = "1.0" + + [build-dependencies] + common-dep = { version = "1.0", features = ["bdep-feat"] } + "#, + ) + .file( + "src/lib.rs", + r#" + /// Some doc + pub fn f() {} + "#, + ) + .file("build.rs", "fn main() {}") + .file( + "pm/Cargo.toml", + r#" + [package] + name = "pm" + version = "0.1.0" + edition = "2018" + + [lib] + proc-macro = true + + [dependencies] + common = "1.0" + "#, + ) + .file( + "pm/src/lib.rs", + r#" + use proc_macro::TokenStream; + + /// Some doc + #[proc_macro] + pub fn pm(_input: TokenStream) -> TokenStream { + "".parse().unwrap() + } + "#, + ) + .build(); + + // No warnings, no duplicates, common and common-dep only documented once. + p.cargo("doc") + // Cannot check full output due to https://github.com/rust-lang/cargo/issues/9076 + .with_stderr_does_not_contain("[WARNING][..]") + .run(); + + assert!(p.build_dir().join("doc/common_dep/fn.f.html").exists()); + assert!(!p + .build_dir() + .join("doc/common_dep/fn.bdep_func.html") + .exists()); + assert!(p.build_dir().join("doc/common/fn.f.html").exists()); + assert!(p.build_dir().join("doc/pm/macro.pm.html").exists()); + assert!(p.build_dir().join("doc/foo/fn.f.html").exists()); +} + +#[cargo_test] +fn collision_doc_profile_split() { + // Same dependency built twice due to different profile settings. + Package::new("common", "1.0.0").publish(); + let p = project() + .file( + "Cargo.toml", + r#" + [package] + name = "foo" + version = "0.1.0" + + [dependencies] + pm = { path = "pm" } + common = "1.0" + + [profile.dev] + opt-level = 2 + "#, + ) + .file("src/lib.rs", "") + .file( + "pm/Cargo.toml", + r#" + [package] + name = "pm" + version = "0.1.0" + + [dependencies] + common = "1.0" + + [lib] + proc-macro = true + "#, + ) + .file("pm/src/lib.rs", "") + .build(); + + // Just to verify that common is normally built twice. + p.cargo("build -v") + .with_stderr( + "\ +[UPDATING] [..] +[DOWNLOADING] crates ... +[DOWNLOADED] common v1.0.0 [..] +[COMPILING] common v1.0.0 +[RUNNING] `rustc --crate-name common [..] +[RUNNING] `rustc --crate-name common [..] +[COMPILING] pm v0.1.0 [..] +[RUNNING] `rustc --crate-name pm [..] +[COMPILING] foo v0.1.0 [..] +[RUNNING] `rustc --crate-name foo [..] +[FINISHED] [..] +", + ) + .run(); + + // Should only document common once, no warnings. + p.cargo("doc") + .with_stderr_unordered( + "\ +[CHECKING] common v1.0.0 +[DOCUMENTING] common v1.0.0 +[DOCUMENTING] pm v0.1.0 [..] +[DOCUMENTING] foo v0.1.0 [..] +[FINISHED] [..] +", + ) + .run(); +} + +#[cargo_test] +fn collision_doc_sources() { + // Different sources with the same package. + Package::new("bar", "1.0.0").publish(); + let p = project() + .file( + "Cargo.toml", + r#" + [package] + name = "foo" + version = "0.1.0" + + [dependencies] + bar = "1.0" + bar2 = { path = "bar", package = "bar" } + "#, + ) + .file("src/lib.rs", "") + .file("bar/Cargo.toml", &basic_manifest("bar", "1.0.0")) + .file("bar/src/lib.rs", "") + .build(); + + p.cargo("doc") + .with_stderr_unordered( + "\ +[UPDATING] [..] +[DOWNLOADING] crates ... +[DOWNLOADED] bar v1.0.0 [..] +[WARNING] output filename collision. +The lib target `bar` in package `bar v1.0.0` has the same output filename as \ +the lib target `bar` in package `bar v1.0.0 ([..]/foo/bar)`. +Colliding filename is: [..]/foo/target/doc/bar/index.html +The targets should have unique names. +This is a known bug where multiple crates with the same name use +the same path; see . +[CHECKING] bar v1.0.0 [..] +[DOCUMENTING] bar v1.0.0 [..] +[DOCUMENTING] bar v1.0.0 +[CHECKING] bar v1.0.0 +[DOCUMENTING] foo v0.1.0 [..] +[FINISHED] [..] +", + ) + .run(); +} From c5e3f17f3b5d5aebe2725bd56019a6ecb05c0360 Mon Sep 17 00:00:00 2001 From: Eric Huss Date: Wed, 20 Jan 2021 08:27:36 -0800 Subject: [PATCH 3/3] Optimize removing unit_deps. --- src/cargo/ops/cargo_compile.rs | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/src/cargo/ops/cargo_compile.rs b/src/cargo/ops/cargo_compile.rs index 3ff7eb46522..32cda133d86 100644 --- a/src/cargo/ops/cargo_compile.rs +++ b/src/cargo/ops/cargo_compile.rs @@ -1497,18 +1497,19 @@ fn remove_duplicate_doc(build_config: &BuildConfig, unit_graph: &mut UnitGraph) .push(unit.clone()); } } + // Keep track of units to remove so that they can be efficiently removed + // from the unit_deps. + let mut removed_units: HashSet = HashSet::new(); let mut remove = |units: Vec, reason: &str| { - for unit in &units { + for unit in units { log::debug!( "removing duplicate doc due to {} for package {} target `{}`", reason, unit.pkg, unit.target.name() ); - unit_graph.remove(unit); - } - for unit_deps in unit_graph.values_mut() { - unit_deps.retain(|unit_dep| !units.iter().any(|unit| *unit == unit_dep.unit)); + unit_graph.remove(&unit); + removed_units.insert(unit); } }; // Iterate over the duplicates and try to remove them from unit_graph. @@ -1566,4 +1567,8 @@ fn remove_duplicate_doc(build_config: &BuildConfig, unit_graph: &mut UnitGraph) // Are there other heuristics to remove duplicates that would make // sense? Maybe prefer path sources over all others? } + // Also remove units from the unit_deps so there aren't any dangling edges. + for unit_deps in unit_graph.values_mut() { + unit_deps.retain(|unit_dep| !removed_units.contains(&unit_dep.unit)); + } }