Skip to content

Commit e342295

Browse files
authored
Rollup merge of rust-lang#128353 - ferrocene:jonathanpallant/add-dependencies-to-copyright-file, r=Kobzol
Change generate-copyright to generate HTML, with cargo dependencies included `x.py run generate-copyright` now produces `build/COPYRIGHT.html`. This includes a new format for in-tree dependencies, and also adds out-of-tree cargo dependencies. After consulting expert opinion, I have elected to include every top-level: * `*NOTICE*` * `*AUTHOR*` * `*LICENSE*` * `*LICENCE*`, and * `*COPYRIGHT*` file I can find - case-insensitive. This is because the cargo package metadata's `author` field is not a list of copyright holders and does not meet the requirements of the Apache-2.0 license (which says you must include a NOTICE file with the binary if one was supplied by the author) nor the MIT license (which says you must include 'the above copyright notice'). I believe it would be appropriate to include this file with every Rust release, in order to do an even better job of appropriately recognising the efforts of the authors of the first-party and third-party libraries we are using here. The output includes something like 524 copies of the Apache-2.0 text because they are not all identical. I think I count about 50 different variations by shasum - some differ in whitespace, while some have the boilerplate block at the bottom erroneously modified (don't modify the copy in the license, modify the copy you paste into your own source code!). Running `gzip` on the HTML file largely makes this problem go away, and the average browser is far happier with a ~6 MiB HTML file than the average Markdown viewer is with a ~6 MiB markdown file. But, if someone wants to, do they could submit a follow-up which de-dups the license text files and adds back-links to earlier identical copies (for some value of 'identical copy'). ```console $ xpy run generate-copyright $ cd build $ gzip -c COPYRIGHT.html > COPYRIGHT.gz $ xz -c COPYRIGHT.html > COPYRIGHT.xz $ ls -lh COPYRIGHT.* -rw-r--r-- 1 jonathan staff 241K 29 Jul 17:19 COPYRIGHT.gz -rw-r--r--@ 1 jonathan staff 6.6M 29 Jul 11:30 COPYRIGHT.html -rw-r--r-- 1 jonathan staff 59K 29 Jul 17:19 COPYRIGHT.xz ``` Here's an example [COPYRIGHT.gz](https://github.com/user-attachments/files/16416147/COPYRIGHT.gz).
2 parents 2ee9678 + 99579f3 commit e342295

File tree

10 files changed

+384
-56
lines changed

10 files changed

+384
-56
lines changed

Cargo.lock

+6
Original file line numberDiff line numberDiff line change
@@ -1406,8 +1406,11 @@ name = "generate-copyright"
14061406
version = "0.1.0"
14071407
dependencies = [
14081408
"anyhow",
1409+
"cargo_metadata 0.18.1",
1410+
"rinja",
14091411
"serde",
14101412
"serde_json",
1413+
"thiserror",
14111414
]
14121415

14131416
[[package]]
@@ -3094,7 +3097,10 @@ version = "0.3.0"
30943097
source = "registry+https://github.com/rust-lang/crates.io-index"
30953098
checksum = "6d3762e3740cdbf2fd2be465cc2c26d643ad17353cc2e0223d211c1b096118bd"
30963099
dependencies = [
3100+
"humansize",
30973101
"itoa",
3102+
"num-traits",
3103+
"percent-encoding",
30983104
"rinja_derive",
30993105
]
31003106

REUSE.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -163,7 +163,7 @@ SPDX-License-Identifier = "MIT OR Apache-2.0"
163163
path = "src/llvm-project/**"
164164
precedence = "override"
165165
SPDX-FileCopyrightText = [
166-
"2003-2019 by the contributors listed in [CREDITS.TXT](https://github.com/rust-lang/llvm-project/blob/7738295178045041669876bf32b0543ec8319a5c/llvm/CREDITS.TXT)",
166+
"2003-2019 by the contributors listed in CREDITS.TXT (https://github.com/rust-lang/llvm-project/blob/7738295178045041669876bf32b0543ec8319a5c/llvm/CREDITS.TXT)",
167167
"2010 Apple Inc",
168168
"2003-2019 University of Illinois at Urbana-Champaign.",
169169
]

src/bootstrap/src/core/build_steps/run.rs

+3-1
Original file line numberDiff line numberDiff line change
@@ -212,11 +212,13 @@ impl Step for GenerateCopyright {
212212
let license_metadata = builder.ensure(CollectLicenseMetadata);
213213

214214
// Temporary location, it will be moved to the proper one once it's accurate.
215-
let dest = builder.out.join("COPYRIGHT.md");
215+
let dest = builder.out.join("COPYRIGHT.html");
216216

217217
let mut cmd = builder.tool_cmd(Tool::GenerateCopyright);
218218
cmd.env("LICENSE_METADATA", &license_metadata);
219219
cmd.env("DEST", &dest);
220+
cmd.env("OUT_DIR", &builder.out);
221+
cmd.env("CARGO", &builder.initial_cargo);
220222
cmd.run(builder);
221223

222224
dest

src/tools/collect-license-metadata/Cargo.toml

+2
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
name = "collect-license-metadata"
33
version = "0.1.0"
44
edition = "2021"
5+
description = "Runs the reuse tool and caches the output, so rust toolchain devs don't need to have reuse installed"
6+
license = "MIT OR Apache-2.0"
57

68
[dependencies]
79
anyhow = "1.0.65"

src/tools/collect-license-metadata/src/main.rs

+5
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,11 @@ use anyhow::Error;
88

99
use crate::licenses::LicensesInterner;
1010

11+
/// The entry point to the binary.
12+
///
13+
/// You should probably let `bootstrap` execute this program instead of running it directly.
14+
///
15+
/// Run `x.py run collect-license-metadata`
1116
fn main() -> Result<(), Error> {
1217
let reuse_exe: PathBuf = std::env::var_os("REUSE_EXE").expect("Missing REUSE_EXE").into();
1318
let dest: PathBuf = std::env::var_os("DEST").expect("Missing DEST").into();

src/tools/generate-copyright/Cargo.toml

+4
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,14 @@
22
name = "generate-copyright"
33
version = "0.1.0"
44
edition = "2021"
5+
description = "Produces a manifest of all the copyrighted materials in the Rust Toolchain"
56

67
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
78

89
[dependencies]
910
anyhow = "1.0.65"
11+
cargo_metadata = "0.18.1"
12+
rinja = "0.3.0"
1013
serde = { version = "1.0.147", features = ["derive"] }
1114
serde_json = "1.0.85"
15+
thiserror = "1"
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,191 @@
1+
//! Gets metadata about a workspace from Cargo
2+
3+
use std::collections::BTreeMap;
4+
use std::ffi::OsStr;
5+
use std::path::{Path, PathBuf};
6+
7+
/// Describes how this module can fail
8+
#[derive(Debug, thiserror::Error)]
9+
pub enum Error {
10+
#[error("I/O Error: {0:?}")]
11+
Io(#[from] std::io::Error),
12+
#[error("Failed get output from cargo-metadata: {0:?}")]
13+
GettingMetadata(#[from] cargo_metadata::Error),
14+
#[error("Failed to run cargo vendor: {0:?}")]
15+
LaunchingVendor(std::io::Error),
16+
#[error("Failed to complete cargo vendor")]
17+
RunningVendor,
18+
#[error("Bad path {0:?} whilst scraping files")]
19+
Scraping(PathBuf),
20+
}
21+
22+
/// Uniquely describes a package on crates.io
23+
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
24+
pub struct Package {
25+
/// The name of the package
26+
pub name: String,
27+
/// The version number
28+
pub version: String,
29+
}
30+
31+
/// Extra data about a package
32+
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
33+
pub struct PackageMetadata {
34+
/// The license it is under
35+
pub license: String,
36+
/// The list of authors from the package metadata
37+
pub authors: Vec<String>,
38+
/// A list of important files from the package, with their contents.
39+
///
40+
/// This includes *COPYRIGHT*, *NOTICE*, *AUTHOR*, *LICENSE*, and *LICENCE* files, case-insensitive.
41+
pub notices: BTreeMap<String, String>,
42+
/// If this is true, this dep is in the Rust Standard Library
43+
pub is_in_libstd: Option<bool>,
44+
}
45+
46+
/// Use `cargo metadata` and `cargo vendor` to get a list of dependencies and their license data.
47+
///
48+
/// This will involve running `cargo vendor` into `${BUILD}/vendor` so we can
49+
/// grab the license files.
50+
///
51+
/// Any dependency with a path beginning with `root_path` is ignored, as we
52+
/// assume `reuse` has covered it already.
53+
pub fn get_metadata_and_notices(
54+
cargo: &Path,
55+
dest: &Path,
56+
root_path: &Path,
57+
manifest_paths: &[&Path],
58+
) -> Result<BTreeMap<Package, PackageMetadata>, Error> {
59+
let mut output = get_metadata(cargo, root_path, manifest_paths)?;
60+
61+
// Now do a cargo-vendor and grab everything
62+
let vendor_path = dest.join("vendor");
63+
println!("Vendoring deps into {}...", vendor_path.display());
64+
run_cargo_vendor(cargo, &vendor_path, manifest_paths)?;
65+
66+
// Now for each dependency we found, go and grab any important looking files
67+
for (package, metadata) in output.iter_mut() {
68+
load_important_files(package, metadata, &vendor_path)?;
69+
}
70+
71+
Ok(output)
72+
}
73+
74+
/// Use `cargo metadata` to get a list of dependencies and their license data.
75+
///
76+
/// Any dependency with a path beginning with `root_path` is ignored, as we
77+
/// assume `reuse` has covered it already.
78+
pub fn get_metadata(
79+
cargo: &Path,
80+
root_path: &Path,
81+
manifest_paths: &[&Path],
82+
) -> Result<BTreeMap<Package, PackageMetadata>, Error> {
83+
let mut output = BTreeMap::new();
84+
// Look at the metadata for each manifest
85+
for manifest_path in manifest_paths {
86+
if manifest_path.file_name() != Some(OsStr::new("Cargo.toml")) {
87+
panic!("cargo_manifest::get requires a path to a Cargo.toml file");
88+
}
89+
let metadata = cargo_metadata::MetadataCommand::new()
90+
.cargo_path(cargo)
91+
.env("RUSTC_BOOTSTRAP", "1")
92+
.manifest_path(manifest_path)
93+
.exec()?;
94+
for package in metadata.packages {
95+
let manifest_path = package.manifest_path.as_path();
96+
if manifest_path.starts_with(root_path) {
97+
// it's an in-tree dependency and reuse covers it
98+
continue;
99+
}
100+
// otherwise it's an out-of-tree dependency
101+
let package_id = Package { name: package.name, version: package.version.to_string() };
102+
output.insert(
103+
package_id,
104+
PackageMetadata {
105+
license: package.license.unwrap_or_else(|| String::from("Unspecified")),
106+
authors: package.authors,
107+
notices: BTreeMap::new(),
108+
is_in_libstd: None,
109+
},
110+
);
111+
}
112+
}
113+
114+
Ok(output)
115+
}
116+
117+
/// Run cargo-vendor, fetching into the given dir
118+
fn run_cargo_vendor(cargo: &Path, dest: &Path, manifest_paths: &[&Path]) -> Result<(), Error> {
119+
let mut vendor_command = std::process::Command::new(cargo);
120+
vendor_command.env("RUSTC_BOOTSTRAP", "1");
121+
vendor_command.arg("vendor");
122+
vendor_command.arg("--quiet");
123+
vendor_command.arg("--versioned-dirs");
124+
for manifest_path in manifest_paths {
125+
vendor_command.arg("-s");
126+
vendor_command.arg(manifest_path);
127+
}
128+
vendor_command.arg(dest);
129+
130+
let vendor_status = vendor_command.status().map_err(Error::LaunchingVendor)?;
131+
132+
if !vendor_status.success() {
133+
return Err(Error::RunningVendor);
134+
}
135+
136+
Ok(())
137+
}
138+
139+
/// Add important files off disk into this dependency.
140+
///
141+
/// Maybe one-day Cargo.toml will contain enough information that we don't need
142+
/// to do this manual scraping.
143+
fn load_important_files(
144+
package: &Package,
145+
dep: &mut PackageMetadata,
146+
vendor_root: &Path,
147+
) -> Result<(), Error> {
148+
let name_version = format!("{}-{}", package.name, package.version);
149+
println!("Scraping notices for {}...", name_version);
150+
let dep_vendor_path = vendor_root.join(name_version);
151+
for entry in std::fs::read_dir(dep_vendor_path)? {
152+
let entry = entry?;
153+
let metadata = entry.metadata()?;
154+
let path = entry.path();
155+
let Some(filename) = path.file_name() else {
156+
return Err(Error::Scraping(path));
157+
};
158+
let lc_filename = filename.to_ascii_lowercase();
159+
let lc_filename_str = lc_filename.to_string_lossy();
160+
let mut keep = false;
161+
for m in ["copyright", "licence", "license", "author", "notice"] {
162+
if lc_filename_str.contains(m) {
163+
keep = true;
164+
break;
165+
}
166+
}
167+
if keep {
168+
if metadata.is_dir() {
169+
for inner_entry in std::fs::read_dir(entry.path())? {
170+
let inner_entry = inner_entry?;
171+
if inner_entry.metadata()?.is_file() {
172+
let inner_filename = inner_entry.file_name();
173+
let inner_filename_str = inner_filename.to_string_lossy();
174+
let qualified_filename =
175+
format!("{}/{}", lc_filename_str, inner_filename_str);
176+
println!("Scraping {}", qualified_filename);
177+
dep.notices.insert(
178+
qualified_filename.to_string(),
179+
std::fs::read_to_string(inner_entry.path())?,
180+
);
181+
}
182+
}
183+
} else if metadata.is_file() {
184+
let filename = filename.to_string_lossy();
185+
println!("Scraping {}", filename);
186+
dep.notices.insert(filename.to_string(), std::fs::read_to_string(path)?);
187+
}
188+
}
189+
}
190+
Ok(())
191+
}

0 commit comments

Comments
 (0)