From c3a240608b6600c8a5ccc89cdcb823836bea0530 Mon Sep 17 00:00:00 2001 From: xFrednet Date: Mon, 8 Jul 2024 16:39:35 +0200 Subject: [PATCH] Lintcheck: Refactor structs and only take one version per crate --- lintcheck/lintcheck_crates.toml | 52 ++-- lintcheck/src/input.rs | 288 +++++++++++++++++ lintcheck/src/main.rs | 528 +------------------------------- lintcheck/src/output.rs | 235 ++++++++++++++ lintcheck/src/popular_crates.rs | 2 +- lintcheck/src/recursive.rs | 3 +- 6 files changed, 567 insertions(+), 541 deletions(-) create mode 100644 lintcheck/src/input.rs create mode 100644 lintcheck/src/output.rs diff --git a/lintcheck/lintcheck_crates.toml b/lintcheck/lintcheck_crates.toml index 52f7fee47b616..ff608e6f93592 100644 --- a/lintcheck/lintcheck_crates.toml +++ b/lintcheck/lintcheck_crates.toml @@ -1,38 +1,38 @@ [crates] # some of these are from cargotest -cargo = {name = "cargo", versions = ['0.64.0']} -iron = {name = "iron", versions = ['0.6.1']} -ripgrep = {name = "ripgrep", versions = ['12.1.1']} -xsv = {name = "xsv", versions = ['0.13.0']} +cargo = {name = "cargo", version = '0.64.0'} +iron = {name = "iron", version = '0.6.1'} +ripgrep = {name = "ripgrep", version = '12.1.1'} +xsv = {name = "xsv", version = '0.13.0'} # commented out because of 173K clippy::match_same_arms msgs in language_type.rs -#tokei = { name = "tokei", versions = ['12.0.4']} -rayon = {name = "rayon", versions = ['1.5.0']} -serde = {name = "serde", versions = ['1.0.118']} +#tokei = { name = "tokei", version = '12.0.4'} +rayon = {name = "rayon", version = '1.5.0'} +serde = {name = "serde", version = '1.0.118'} # top 10 crates.io dls -bitflags = {name = "bitflags", versions = ['1.2.1']} +bitflags = {name = "bitflags", version = '1.2.1'} # crash = {name = "clippy_crash", path = "/tmp/clippy_crash"} -libc = {name = "libc", versions = ['0.2.81']} -log = {name = "log", versions = ['0.4.11']} -proc-macro2 = {name = "proc-macro2", versions = ['1.0.24']} -quote = {name = "quote", versions = ['1.0.7']} -rand = {name = "rand", versions = ['0.7.3']} -rand_core = {name = "rand_core", versions = ['0.6.0']} -regex = {name = "regex", versions = ['1.3.2']} -syn = {name = "syn", versions = ['1.0.54']} -unicode-xid = {name = "unicode-xid", versions = ['0.2.1']} +libc = {name = "libc", version = '0.2.81'} +log = {name = "log", version = '0.4.11'} +proc-macro2 = {name = "proc-macro2", version = '1.0.24'} +quote = {name = "quote", version = '1.0.7'} +rand = {name = "rand", version = '0.7.3'} +rand_core = {name = "rand_core", version = '0.6.0'} +regex = {name = "regex", version = '1.3.2'} +syn = {name = "syn", version = '1.0.54'} +unicode-xid = {name = "unicode-xid", version = '0.2.1'} # some more of dtolnays crates -anyhow = {name = "anyhow", versions = ['1.0.38']} -async-trait = {name = "async-trait", versions = ['0.1.42']} -cxx = {name = "cxx", versions = ['1.0.32']} -ryu = {name = "ryu", versions = ['1.0.5']} -serde_yaml = {name = "serde_yaml", versions = ['0.8.17']} -thiserror = {name = "thiserror", versions = ['1.0.24']} +anyhow = {name = "anyhow", version = '1.0.38'} +async-trait = {name = "async-trait", version = '0.1.42'} +cxx = {name = "cxx", version = '1.0.32'} +ryu = {name = "ryu", version = '1.0.5'} +serde_yaml = {name = "serde_yaml", version = '0.8.17'} +thiserror = {name = "thiserror", version = '1.0.24'} # some embark crates, there are other interesting crates but # unfortunately adding them increases lintcheck runtime drastically -cfg-expr = {name = "cfg-expr", versions = ['0.7.1']} +cfg-expr = {name = "cfg-expr", version = '0.7.1'} puffin = {name = "puffin", git_url = "https://github.com/EmbarkStudios/puffin", git_hash = "02dd4a3"} -rpmalloc = {name = "rpmalloc", versions = ['0.2.0']} -tame-oidc = {name = "tame-oidc", versions = ['0.1.0']} +rpmalloc = {name = "rpmalloc", version = '0.2.0'} +tame-oidc = {name = "tame-oidc", version = '0.1.0'} [recursive] ignore = [ diff --git a/lintcheck/src/input.rs b/lintcheck/src/input.rs new file mode 100644 index 0000000000000..3d034391c280f --- /dev/null +++ b/lintcheck/src/input.rs @@ -0,0 +1,288 @@ +use std::collections::{HashMap, HashSet}; +use std::fs::{self}; +use std::io::{self, ErrorKind}; +use std::path::{Path, PathBuf}; +use std::process::Command; +use std::time::Duration; + +use serde::Deserialize; +use walkdir::{DirEntry, WalkDir}; + +use crate::{Crate, LINTCHECK_DOWNLOADS, LINTCHECK_SOURCES}; + +/// List of sources to check, loaded from a .toml file +#[derive(Debug, Deserialize)] +pub struct SourceList { + crates: HashMap, + #[serde(default)] + recursive: RecursiveOptions, +} + +#[derive(Debug, Deserialize, Default)] +pub struct RecursiveOptions { + pub ignore: HashSet, +} + +/// A crate source stored inside the .toml +/// will be translated into on one of the `CrateSource` variants +#[derive(Debug, Deserialize)] +struct TomlCrate { + name: String, + version: Option, + git_url: Option, + git_hash: Option, + path: Option, + options: Option>, +} + +/// Represents an archive we download from crates.io, or a git repo, or a local repo/folder +/// Once processed (downloaded/extracted/cloned/copied...), this will be translated into a `Crate` +#[derive(Debug, Deserialize, Eq, Hash, PartialEq, Ord, PartialOrd)] +pub enum CrateSource { + CratesIo { + name: String, + version: String, + options: Option>, + }, + Git { + name: String, + url: String, + commit: String, + options: Option>, + }, + Path { + name: String, + path: PathBuf, + options: Option>, + }, +} + +/// Read a `lintcheck_crates.toml` file +pub fn read_crates(toml_path: &Path) -> (Vec, RecursiveOptions) { + let toml_content: String = + fs::read_to_string(toml_path).unwrap_or_else(|_| panic!("Failed to read {}", toml_path.display())); + let crate_list: SourceList = + toml::from_str(&toml_content).unwrap_or_else(|e| panic!("Failed to parse {}: \n{e}", toml_path.display())); + // parse the hashmap of the toml file into a list of crates + let tomlcrates: Vec = crate_list.crates.into_values().collect(); + + // flatten TomlCrates into CrateSources (one TomlCrates may represent several versions of a crate => + // multiple Cratesources) + let mut crate_sources = Vec::new(); + for tk in tomlcrates { + if let Some(ref path) = tk.path { + crate_sources.push(CrateSource::Path { + name: tk.name.clone(), + path: PathBuf::from(path), + options: tk.options.clone(), + }); + } else if let Some(ref version) = tk.version { + crate_sources.push(CrateSource::CratesIo { + name: tk.name.clone(), + version: version.to_string(), + options: tk.options.clone(), + }); + } else if tk.git_url.is_some() && tk.git_hash.is_some() { + // otherwise, we should have a git source + crate_sources.push(CrateSource::Git { + name: tk.name.clone(), + url: tk.git_url.clone().unwrap(), + commit: tk.git_hash.clone().unwrap(), + options: tk.options.clone(), + }); + } else { + panic!("Invalid crate source: {tk:?}"); + } + + // if we have a version as well as a git data OR only one git data, something is funky + if tk.version.is_some() && (tk.git_url.is_some() || tk.git_hash.is_some()) + || tk.git_hash.is_some() != tk.git_url.is_some() + { + eprintln!("tomlkrate: {tk:?}"); + assert_eq!( + tk.git_hash.is_some(), + tk.git_url.is_some(), + "Error: Encountered TomlCrate with only one of git_hash and git_url!" + ); + assert!( + tk.path.is_none() || (tk.git_hash.is_none() && tk.version.is_none()), + "Error: TomlCrate can only have one of 'git_.*', 'version' or 'path' fields" + ); + unreachable!("Failed to translate TomlCrate into CrateSource!"); + } + } + // sort the crates + crate_sources.sort(); + + (crate_sources, crate_list.recursive) +} + +impl CrateSource { + /// Makes the sources available on the disk for clippy to check. + /// Clones a git repo and checks out the specified commit or downloads a crate from crates.io or + /// copies a local folder + #[expect(clippy::too_many_lines)] + pub fn download_and_extract(&self) -> Crate { + #[allow(clippy::result_large_err)] + fn get(path: &str) -> Result { + const MAX_RETRIES: u8 = 4; + let mut retries = 0; + loop { + match ureq::get(path).call() { + Ok(res) => return Ok(res), + Err(e) if retries >= MAX_RETRIES => return Err(e), + Err(ureq::Error::Transport(e)) => eprintln!("Error: {e}"), + Err(e) => return Err(e), + } + eprintln!("retrying in {retries} seconds..."); + std::thread::sleep(Duration::from_secs(u64::from(retries))); + retries += 1; + } + } + match self { + CrateSource::CratesIo { name, version, options } => { + let extract_dir = PathBuf::from(LINTCHECK_SOURCES); + let krate_download_dir = PathBuf::from(LINTCHECK_DOWNLOADS); + + // url to download the crate from crates.io + let url = format!("https://crates.io/api/v1/crates/{name}/{version}/download"); + println!("Downloading and extracting {name} {version} from {url}"); + create_dirs(&krate_download_dir, &extract_dir); + + let krate_file_path = krate_download_dir.join(format!("{name}-{version}.crate.tar.gz")); + // don't download/extract if we already have done so + if !krate_file_path.is_file() { + // create a file path to download and write the crate data into + let mut krate_dest = fs::File::create(&krate_file_path).unwrap(); + let mut krate_req = get(&url).unwrap().into_reader(); + // copy the crate into the file + io::copy(&mut krate_req, &mut krate_dest).unwrap(); + + // unzip the tarball + let ungz_tar = flate2::read::GzDecoder::new(fs::File::open(&krate_file_path).unwrap()); + // extract the tar archive + let mut archive = tar::Archive::new(ungz_tar); + archive.unpack(&extract_dir).expect("Failed to extract!"); + } + // crate is extracted, return a new Krate object which contains the path to the extracted + // sources that clippy can check + Crate { + version: version.clone(), + name: name.clone(), + path: extract_dir.join(format!("{name}-{version}/")), + options: options.clone(), + } + }, + CrateSource::Git { + name, + url, + commit, + options, + } => { + let repo_path = { + let mut repo_path = PathBuf::from(LINTCHECK_SOURCES); + // add a -git suffix in case we have the same crate from crates.io and a git repo + repo_path.push(format!("{name}-git")); + repo_path + }; + // clone the repo if we have not done so + if !repo_path.is_dir() { + println!("Cloning {url} and checking out {commit}"); + if !Command::new("git") + .arg("clone") + .arg(url) + .arg(&repo_path) + .status() + .expect("Failed to clone git repo!") + .success() + { + eprintln!("Failed to clone {url} into {}", repo_path.display()); + } + } + // check out the commit/branch/whatever + if !Command::new("git") + .args(["-c", "advice.detachedHead=false"]) + .arg("checkout") + .arg(commit) + .current_dir(&repo_path) + .status() + .expect("Failed to check out commit") + .success() + { + eprintln!("Failed to checkout {commit} of repo at {}", repo_path.display()); + } + + Crate { + version: commit.clone(), + name: name.clone(), + path: repo_path, + options: options.clone(), + } + }, + CrateSource::Path { name, path, options } => { + fn is_cache_dir(entry: &DirEntry) -> bool { + fs::read(entry.path().join("CACHEDIR.TAG")) + .map(|x| x.starts_with(b"Signature: 8a477f597d28d172789f06886806bc55")) + .unwrap_or(false) + } + + // copy path into the dest_crate_root but skip directories that contain a CACHEDIR.TAG file. + // The target/ directory contains a CACHEDIR.TAG file so it is the most commonly skipped directory + // as a result of this filter. + let dest_crate_root = PathBuf::from(LINTCHECK_SOURCES).join(name); + if dest_crate_root.exists() { + println!("Deleting existing directory at {dest_crate_root:?}"); + fs::remove_dir_all(&dest_crate_root).unwrap(); + } + + println!("Copying {path:?} to {dest_crate_root:?}"); + + for entry in WalkDir::new(path).into_iter().filter_entry(|e| !is_cache_dir(e)) { + let entry = entry.unwrap(); + let entry_path = entry.path(); + let relative_entry_path = entry_path.strip_prefix(path).unwrap(); + let dest_path = dest_crate_root.join(relative_entry_path); + let metadata = entry_path.symlink_metadata().unwrap(); + + if metadata.is_dir() { + fs::create_dir(dest_path).unwrap(); + } else if metadata.is_file() { + fs::copy(entry_path, dest_path).unwrap(); + } + } + + Crate { + version: String::from("local"), + name: name.clone(), + path: dest_crate_root, + options: options.clone(), + } + }, + } + } +} + +/// Create necessary directories to run the lintcheck tool. +/// +/// # Panics +/// +/// This function panics if creating one of the dirs fails. +fn create_dirs(krate_download_dir: &Path, extract_dir: &Path) { + fs::create_dir("target/lintcheck/").unwrap_or_else(|err| { + assert_eq!( + err.kind(), + ErrorKind::AlreadyExists, + "cannot create lintcheck target dir" + ); + }); + fs::create_dir(krate_download_dir).unwrap_or_else(|err| { + assert_eq!(err.kind(), ErrorKind::AlreadyExists, "cannot create crate download dir"); + }); + fs::create_dir(extract_dir).unwrap_or_else(|err| { + assert_eq!( + err.kind(), + ErrorKind::AlreadyExists, + "cannot create crate extraction dir" + ); + }); +} diff --git a/lintcheck/src/main.rs b/lintcheck/src/main.rs index 26a67beb4427b..e37ffab13ac89 100644 --- a/lintcheck/src/main.rs +++ b/lintcheck/src/main.rs @@ -13,84 +13,38 @@ unused_lifetimes, unused_qualifications )] -#![allow(clippy::collapsible_else_if, clippy::needless_borrows_for_generic_args)] +#![allow( + clippy::collapsible_else_if, + clippy::needless_borrows_for_generic_args, + clippy::module_name_repetitions +)] mod config; mod driver; +mod input; mod json; +mod output; mod popular_crates; mod recursive; use crate::config::{Commands, LintcheckConfig, OutputFormat}; use crate::recursive::LintcheckServer; -use std::collections::{HashMap, HashSet}; use std::env::consts::EXE_SUFFIX; -use std::fmt::{self, Display, Write as _}; -use std::hash::Hash; -use std::io::{self, ErrorKind}; +use std::io::{self}; use std::path::{Path, PathBuf}; -use std::process::{Command, ExitStatus, Stdio}; +use std::process::{Command, Stdio}; use std::sync::atomic::{AtomicUsize, Ordering}; -use std::time::Duration; -use std::{env, fs, thread}; +use std::{env, fs}; -use cargo_metadata::diagnostic::{Diagnostic, DiagnosticSpan}; use cargo_metadata::Message; +use input::{read_crates, CrateSource}; +use output::{ClippyCheckOutput, ClippyWarning, RustcIce}; use rayon::prelude::*; -use serde::Deserialize; -use walkdir::{DirEntry, WalkDir}; const LINTCHECK_DOWNLOADS: &str = "target/lintcheck/downloads"; const LINTCHECK_SOURCES: &str = "target/lintcheck/sources"; -/// List of sources to check, loaded from a .toml file -#[derive(Debug, Deserialize)] -struct SourceList { - crates: HashMap, - #[serde(default)] - recursive: RecursiveOptions, -} - -#[derive(Debug, Deserialize, Default)] -struct RecursiveOptions { - ignore: HashSet, -} - -/// A crate source stored inside the .toml -/// will be translated into on one of the `CrateSource` variants -#[derive(Debug, Deserialize)] -struct TomlCrate { - name: String, - versions: Option>, - git_url: Option, - git_hash: Option, - path: Option, - options: Option>, -} - -/// Represents an archive we download from crates.io, or a git repo, or a local repo/folder -/// Once processed (downloaded/extracted/cloned/copied...), this will be translated into a `Crate` -#[derive(Debug, Deserialize, Eq, Hash, PartialEq, Ord, PartialOrd)] -enum CrateSource { - CratesIo { - name: String, - version: String, - options: Option>, - }, - Git { - name: String, - url: String, - commit: String, - options: Option>, - }, - Path { - name: String, - path: PathBuf, - options: Option>, - }, -} - /// Represents the actual source code of a crate that we ran "cargo clippy" on #[derive(Debug)] struct Crate { @@ -101,241 +55,6 @@ struct Crate { options: Option>, } -/// A single emitted output from clippy being executed on a crate. It may either be a -/// `ClippyWarning`, or a `RustcIce` caused by a panic within clippy. A crate may have many -/// `ClippyWarning`s but a maximum of one `RustcIce` (at which point clippy halts execution). -#[derive(Debug)] -enum ClippyCheckOutput { - ClippyWarning(ClippyWarning), - RustcIce(RustcIce), -} - -#[derive(Debug)] -struct RustcIce { - pub crate_name: String, - pub ice_content: String, -} - -impl Display for RustcIce { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!( - f, - "{}:\n{}\n========================================\n", - self.crate_name, self.ice_content - ) - } -} - -impl RustcIce { - pub fn from_stderr_and_status(crate_name: &str, status: ExitStatus, stderr: &str) -> Option { - if status.code().unwrap_or(0) == 101 - /* ice exit status */ - { - Some(Self { - crate_name: crate_name.to_owned(), - ice_content: stderr.to_owned(), - }) - } else { - None - } - } -} - -/// A single warning that clippy issued while checking a `Crate` -#[derive(Debug)] -struct ClippyWarning { - lint: String, - diag: Diagnostic, -} - -#[allow(unused)] -impl ClippyWarning { - fn new(mut diag: Diagnostic) -> Option { - let lint = diag.code.clone()?.code; - if !(lint.contains("clippy") || diag.message.contains("clippy")) - || diag.message.contains("could not read cargo metadata") - { - return None; - } - - // --recursive bypasses cargo so we have to strip the rendered output ourselves - let rendered = diag.rendered.as_mut().unwrap(); - *rendered = strip_ansi_escapes::strip_str(&rendered); - - Some(Self { lint, diag }) - } - - fn span(&self) -> &DiagnosticSpan { - self.diag.spans.iter().find(|span| span.is_primary).unwrap() - } - - fn to_output(&self, format: OutputFormat) -> String { - let span = self.span(); - let mut file = span.file_name.clone(); - let file_with_pos = format!("{file}:{}:{}", span.line_start, span.line_end); - match format { - OutputFormat::Text => format!("{file_with_pos} {} \"{}\"\n", self.lint, self.diag.message), - OutputFormat::Markdown => { - if file.starts_with("target") { - file.insert_str(0, "../"); - } - - let mut output = String::from("| "); - write!(output, "[`{file_with_pos}`]({file}#L{})", span.line_start).unwrap(); - write!(output, r#" | `{:<50}` | "{}" |"#, self.lint, self.diag.message).unwrap(); - output.push('\n'); - output - }, - OutputFormat::Json => unreachable!("JSON output is handled via serde"), - } - } -} - -#[allow(clippy::result_large_err)] -fn get(path: &str) -> Result { - const MAX_RETRIES: u8 = 4; - let mut retries = 0; - loop { - match ureq::get(path).call() { - Ok(res) => return Ok(res), - Err(e) if retries >= MAX_RETRIES => return Err(e), - Err(ureq::Error::Transport(e)) => eprintln!("Error: {e}"), - Err(e) => return Err(e), - } - eprintln!("retrying in {retries} seconds..."); - thread::sleep(Duration::from_secs(u64::from(retries))); - retries += 1; - } -} - -impl CrateSource { - /// Makes the sources available on the disk for clippy to check. - /// Clones a git repo and checks out the specified commit or downloads a crate from crates.io or - /// copies a local folder - fn download_and_extract(&self) -> Crate { - match self { - CrateSource::CratesIo { name, version, options } => { - let extract_dir = PathBuf::from(LINTCHECK_SOURCES); - let krate_download_dir = PathBuf::from(LINTCHECK_DOWNLOADS); - - // url to download the crate from crates.io - let url = format!("https://crates.io/api/v1/crates/{name}/{version}/download"); - println!("Downloading and extracting {name} {version} from {url}"); - create_dirs(&krate_download_dir, &extract_dir); - - let krate_file_path = krate_download_dir.join(format!("{name}-{version}.crate.tar.gz")); - // don't download/extract if we already have done so - if !krate_file_path.is_file() { - // create a file path to download and write the crate data into - let mut krate_dest = fs::File::create(&krate_file_path).unwrap(); - let mut krate_req = get(&url).unwrap().into_reader(); - // copy the crate into the file - io::copy(&mut krate_req, &mut krate_dest).unwrap(); - - // unzip the tarball - let ungz_tar = flate2::read::GzDecoder::new(fs::File::open(&krate_file_path).unwrap()); - // extract the tar archive - let mut archive = tar::Archive::new(ungz_tar); - archive.unpack(&extract_dir).expect("Failed to extract!"); - } - // crate is extracted, return a new Krate object which contains the path to the extracted - // sources that clippy can check - Crate { - version: version.clone(), - name: name.clone(), - path: extract_dir.join(format!("{name}-{version}/")), - options: options.clone(), - } - }, - CrateSource::Git { - name, - url, - commit, - options, - } => { - let repo_path = { - let mut repo_path = PathBuf::from(LINTCHECK_SOURCES); - // add a -git suffix in case we have the same crate from crates.io and a git repo - repo_path.push(format!("{name}-git")); - repo_path - }; - // clone the repo if we have not done so - if !repo_path.is_dir() { - println!("Cloning {url} and checking out {commit}"); - if !Command::new("git") - .arg("clone") - .arg(url) - .arg(&repo_path) - .status() - .expect("Failed to clone git repo!") - .success() - { - eprintln!("Failed to clone {url} into {}", repo_path.display()); - } - } - // check out the commit/branch/whatever - if !Command::new("git") - .args(["-c", "advice.detachedHead=false"]) - .arg("checkout") - .arg(commit) - .current_dir(&repo_path) - .status() - .expect("Failed to check out commit") - .success() - { - eprintln!("Failed to checkout {commit} of repo at {}", repo_path.display()); - } - - Crate { - version: commit.clone(), - name: name.clone(), - path: repo_path, - options: options.clone(), - } - }, - CrateSource::Path { name, path, options } => { - fn is_cache_dir(entry: &DirEntry) -> bool { - fs::read(entry.path().join("CACHEDIR.TAG")) - .map(|x| x.starts_with(b"Signature: 8a477f597d28d172789f06886806bc55")) - .unwrap_or(false) - } - - // copy path into the dest_crate_root but skip directories that contain a CACHEDIR.TAG file. - // The target/ directory contains a CACHEDIR.TAG file so it is the most commonly skipped directory - // as a result of this filter. - let dest_crate_root = PathBuf::from(LINTCHECK_SOURCES).join(name); - if dest_crate_root.exists() { - println!("Deleting existing directory at {dest_crate_root:?}"); - fs::remove_dir_all(&dest_crate_root).unwrap(); - } - - println!("Copying {path:?} to {dest_crate_root:?}"); - - for entry in WalkDir::new(path).into_iter().filter_entry(|e| !is_cache_dir(e)) { - let entry = entry.unwrap(); - let entry_path = entry.path(); - let relative_entry_path = entry_path.strip_prefix(path).unwrap(); - let dest_path = dest_crate_root.join(relative_entry_path); - let metadata = entry_path.symlink_metadata().unwrap(); - - if metadata.is_dir() { - fs::create_dir(dest_path).unwrap(); - } else if metadata.is_file() { - fs::copy(entry_path, dest_path).unwrap(); - } - } - - Crate { - version: String::from("local"), - name: name.clone(), - path: dest_crate_root, - options: options.clone(), - } - }, - } - } -} - impl Crate { /// Run `cargo clippy` on the `Crate` and collect and return all the lint warnings that clippy /// issued @@ -496,96 +215,6 @@ fn build_clippy() -> String { String::from_utf8_lossy(&output.stdout).into_owned() } -/// Read a `lintcheck_crates.toml` file -fn read_crates(toml_path: &Path) -> (Vec, RecursiveOptions) { - let toml_content: String = - fs::read_to_string(toml_path).unwrap_or_else(|_| panic!("Failed to read {}", toml_path.display())); - let crate_list: SourceList = - toml::from_str(&toml_content).unwrap_or_else(|e| panic!("Failed to parse {}: \n{e}", toml_path.display())); - // parse the hashmap of the toml file into a list of crates - let tomlcrates: Vec = crate_list.crates.into_values().collect(); - - // flatten TomlCrates into CrateSources (one TomlCrates may represent several versions of a crate => - // multiple Cratesources) - let mut crate_sources = Vec::new(); - for tk in tomlcrates { - if let Some(ref path) = tk.path { - crate_sources.push(CrateSource::Path { - name: tk.name.clone(), - path: PathBuf::from(path), - options: tk.options.clone(), - }); - } else if let Some(ref versions) = tk.versions { - // if we have multiple versions, save each one - for ver in versions { - crate_sources.push(CrateSource::CratesIo { - name: tk.name.clone(), - version: ver.to_string(), - options: tk.options.clone(), - }); - } - } else if tk.git_url.is_some() && tk.git_hash.is_some() { - // otherwise, we should have a git source - crate_sources.push(CrateSource::Git { - name: tk.name.clone(), - url: tk.git_url.clone().unwrap(), - commit: tk.git_hash.clone().unwrap(), - options: tk.options.clone(), - }); - } else { - panic!("Invalid crate source: {tk:?}"); - } - - // if we have a version as well as a git data OR only one git data, something is funky - if tk.versions.is_some() && (tk.git_url.is_some() || tk.git_hash.is_some()) - || tk.git_hash.is_some() != tk.git_url.is_some() - { - eprintln!("tomlkrate: {tk:?}"); - assert_eq!( - tk.git_hash.is_some(), - tk.git_url.is_some(), - "Error: Encountered TomlCrate with only one of git_hash and git_url!" - ); - assert!( - tk.path.is_none() || (tk.git_hash.is_none() && tk.versions.is_none()), - "Error: TomlCrate can only have one of 'git_.*', 'version' or 'path' fields" - ); - unreachable!("Failed to translate TomlCrate into CrateSource!"); - } - } - // sort the crates - crate_sources.sort(); - - (crate_sources, crate_list.recursive) -} - -/// Generate a short list of occurring lints-types and their count -fn gather_stats(warnings: &[ClippyWarning]) -> (String, HashMap<&String, usize>) { - // count lint type occurrences - let mut counter: HashMap<&String, usize> = HashMap::new(); - warnings - .iter() - .for_each(|wrn| *counter.entry(&wrn.lint).or_insert(0) += 1); - - // collect into a tupled list for sorting - let mut stats: Vec<(&&String, &usize)> = counter.iter().collect(); - // sort by "000{count} {clippy::lintname}" - // to not have a lint with 200 and 2 warnings take the same spot - stats.sort_by_key(|(lint, count)| format!("{count:0>4}, {lint}")); - - let mut header = String::from("| lint | count |\n"); - header.push_str("| -------------------------------------------------- | ----- |\n"); - let stats_string = stats - .iter() - .map(|(lint, count)| format!("| {lint:<50} | {count:>4} |\n")) - .fold(header, |mut table, line| { - table.push_str(&line); - table - }); - - (stats_string, counter) -} - fn main() { // We're being executed as a `RUSTC_WRAPPER` as part of `--recursive` if let Ok(addr) = env::var("LINTCHECK_SERVER") { @@ -738,7 +367,9 @@ fn lintcheck(config: LintcheckConfig) { } let text = match config.format { - OutputFormat::Text | OutputFormat::Markdown => output(&warnings, &raw_ices, clippy_ver, &config), + OutputFormat::Text | OutputFormat::Markdown => { + output::summarize_and_print_changes(&warnings, &raw_ices, clippy_ver, &config) + }, OutputFormat::Json => { if !raw_ices.is_empty() { for ice in raw_ices { @@ -756,135 +387,6 @@ fn lintcheck(config: LintcheckConfig) { fs::write(&config.lintcheck_results_path, text).unwrap(); } -/// Creates the log file output for [`OutputFormat::Text`] and [`OutputFormat::Markdown`] -fn output(warnings: &[ClippyWarning], ices: &[RustcIce], clippy_ver: String, config: &LintcheckConfig) -> String { - // generate some stats - let (stats_formatted, new_stats) = gather_stats(warnings); - let old_stats = read_stats_from_file(&config.lintcheck_results_path); - - let mut all_msgs: Vec = warnings.iter().map(|warn| warn.to_output(config.format)).collect(); - all_msgs.sort(); - all_msgs.push("\n\n### Stats:\n\n".into()); - all_msgs.push(stats_formatted); - - let mut text = clippy_ver; // clippy version number on top - text.push_str("\n### Reports\n\n"); - if config.format == OutputFormat::Markdown { - text.push_str("| file | lint | message |\n"); - text.push_str("| --- | --- | --- |\n"); - } - write!(text, "{}", all_msgs.join("")).unwrap(); - text.push_str("\n\n### ICEs:\n"); - for ice in ices { - writeln!(text, "{ice}").unwrap(); - } - - print_stats(old_stats, new_stats, &config.lint_filter); - - text -} - -/// read the previous stats from the lintcheck-log file -fn read_stats_from_file(file_path: &Path) -> HashMap { - let file_content: String = match fs::read_to_string(file_path).ok() { - Some(content) => content, - None => { - return HashMap::new(); - }, - }; - - let lines: Vec = file_content.lines().map(ToString::to_string).collect(); - - lines - .iter() - .skip_while(|line| line.as_str() != "### Stats:") - // Skipping the table header and the `Stats:` label - .skip(4) - .take_while(|line| line.starts_with("| ")) - .filter_map(|line| { - let mut spl = line.split('|'); - // Skip the first `|` symbol - spl.next(); - if let (Some(lint), Some(count)) = (spl.next(), spl.next()) { - Some((lint.trim().to_string(), count.trim().parse::().unwrap())) - } else { - None - } - }) - .collect::>() -} - -/// print how lint counts changed between runs -fn print_stats(old_stats: HashMap, new_stats: HashMap<&String, usize>, lint_filter: &[String]) { - let same_in_both_hashmaps = old_stats - .iter() - .filter(|(old_key, old_val)| new_stats.get::<&String>(old_key) == Some(old_val)) - .map(|(k, v)| (k.to_string(), *v)) - .collect::>(); - - let mut old_stats_deduped = old_stats; - let mut new_stats_deduped = new_stats; - - // remove duplicates from both hashmaps - for (k, v) in &same_in_both_hashmaps { - assert!(old_stats_deduped.remove(k) == Some(*v)); - assert!(new_stats_deduped.remove(k) == Some(*v)); - } - - println!("\nStats:"); - - // list all new counts (key is in new stats but not in old stats) - new_stats_deduped - .iter() - .filter(|(new_key, _)| !old_stats_deduped.contains_key::(new_key)) - .for_each(|(new_key, new_value)| { - println!("{new_key} 0 => {new_value}"); - }); - - // list all changed counts (key is in both maps but value differs) - new_stats_deduped - .iter() - .filter(|(new_key, _new_val)| old_stats_deduped.contains_key::(new_key)) - .for_each(|(new_key, new_val)| { - let old_val = old_stats_deduped.get::(new_key).unwrap(); - println!("{new_key} {old_val} => {new_val}"); - }); - - // list all gone counts (key is in old status but not in new stats) - old_stats_deduped - .iter() - .filter(|(old_key, _)| !new_stats_deduped.contains_key::<&String>(old_key)) - .filter(|(old_key, _)| lint_filter.is_empty() || lint_filter.contains(old_key)) - .for_each(|(old_key, old_value)| { - println!("{old_key} {old_value} => 0"); - }); -} - -/// Create necessary directories to run the lintcheck tool. -/// -/// # Panics -/// -/// This function panics if creating one of the dirs fails. -fn create_dirs(krate_download_dir: &Path, extract_dir: &Path) { - fs::create_dir("target/lintcheck/").unwrap_or_else(|err| { - assert_eq!( - err.kind(), - ErrorKind::AlreadyExists, - "cannot create lintcheck target dir" - ); - }); - fs::create_dir(krate_download_dir).unwrap_or_else(|err| { - assert_eq!(err.kind(), ErrorKind::AlreadyExists, "cannot create crate download dir"); - }); - fs::create_dir(extract_dir).unwrap_or_else(|err| { - assert_eq!( - err.kind(), - ErrorKind::AlreadyExists, - "cannot create crate extraction dir" - ); - }); -} - /// Returns the path to the Clippy project directory #[must_use] fn clippy_project_root() -> &'static Path { diff --git a/lintcheck/src/output.rs b/lintcheck/src/output.rs new file mode 100644 index 0000000000000..4bfc554ef9e61 --- /dev/null +++ b/lintcheck/src/output.rs @@ -0,0 +1,235 @@ +use cargo_metadata::diagnostic::{Diagnostic, DiagnosticSpan}; +use serde::{Deserialize, Serialize}; +use std::collections::HashMap; +use std::fmt::{self, Write as _}; +use std::fs; +use std::path::Path; +use std::process::ExitStatus; + +use crate::config::{LintcheckConfig, OutputFormat}; + +/// A single emitted output from clippy being executed on a crate. It may either be a +/// `ClippyWarning`, or a `RustcIce` caused by a panic within clippy. A crate may have many +/// `ClippyWarning`s but a maximum of one `RustcIce` (at which point clippy halts execution). +#[derive(Debug)] +pub enum ClippyCheckOutput { + ClippyWarning(ClippyWarning), + RustcIce(RustcIce), +} + +#[derive(Debug)] +pub struct RustcIce { + pub crate_name: String, + pub ice_content: String, +} + +impl fmt::Display for RustcIce { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "{}:\n{}\n========================================\n", + self.crate_name, self.ice_content + ) + } +} + +impl RustcIce { + pub fn from_stderr_and_status(crate_name: &str, status: ExitStatus, stderr: &str) -> Option { + if status.code().unwrap_or(0) == 101 + /* ice exit status */ + { + Some(Self { + crate_name: crate_name.to_owned(), + ice_content: stderr.to_owned(), + }) + } else { + None + } + } +} + +/// A single warning that clippy issued while checking a `Crate` +#[derive(Debug, PartialEq, Eq, Hash, Serialize, Deserialize)] +pub struct ClippyWarning { + pub lint: String, + pub diag: Diagnostic, +} + +#[allow(unused)] +impl ClippyWarning { + pub fn new(mut diag: Diagnostic) -> Option { + let lint = diag.code.clone()?.code; + if !(lint.contains("clippy") || diag.message.contains("clippy")) + || diag.message.contains("could not read cargo metadata") + { + return None; + } + + // --recursive bypasses cargo so we have to strip the rendered output ourselves + let rendered = diag.rendered.as_mut().unwrap(); + *rendered = strip_ansi_escapes::strip_str(&rendered); + + Some(Self { lint, diag }) + } + + pub fn span(&self) -> &DiagnosticSpan { + self.diag.spans.iter().find(|span| span.is_primary).unwrap() + } + + pub fn to_output(&self, format: OutputFormat) -> String { + let span = self.span(); + let mut file = span.file_name.clone(); + let file_with_pos = format!("{file}:{}:{}", span.line_start, span.line_end); + match format { + OutputFormat::Text => format!("{file_with_pos} {} \"{}\"\n", self.lint, self.diag.message), + OutputFormat::Markdown => { + if file.starts_with("target") { + file.insert_str(0, "../"); + } + + let mut output = String::from("| "); + write!(output, "[`{file_with_pos}`]({file}#L{})", span.line_start).unwrap(); + write!(output, r#" | `{:<50}` | "{}" |"#, self.lint, self.diag.message).unwrap(); + output.push('\n'); + output + }, + OutputFormat::Json => unreachable!("JSON output is handled via serde"), + } + } +} + +/// Creates the log file output for [`OutputFormat::Text`] and [`OutputFormat::Markdown`] +pub fn summarize_and_print_changes( + warnings: &[ClippyWarning], + ices: &[RustcIce], + clippy_ver: String, + config: &LintcheckConfig, +) -> String { + // generate some stats + let (stats_formatted, new_stats) = gather_stats(warnings); + let old_stats = read_stats_from_file(&config.lintcheck_results_path); + + let mut all_msgs: Vec = warnings.iter().map(|warn| warn.to_output(config.format)).collect(); + all_msgs.sort(); + all_msgs.push("\n\n### Stats:\n\n".into()); + all_msgs.push(stats_formatted); + + let mut text = clippy_ver; // clippy version number on top + text.push_str("\n### Reports\n\n"); + if config.format == OutputFormat::Markdown { + text.push_str("| file | lint | message |\n"); + text.push_str("| --- | --- | --- |\n"); + } + write!(text, "{}", all_msgs.join("")).unwrap(); + text.push_str("\n\n### ICEs:\n"); + for ice in ices { + writeln!(text, "{ice}").unwrap(); + } + + print_stats(old_stats, new_stats, &config.lint_filter); + + text +} + +/// Generate a short list of occurring lints-types and their count +fn gather_stats(warnings: &[ClippyWarning]) -> (String, HashMap<&String, usize>) { + // count lint type occurrences + let mut counter: HashMap<&String, usize> = HashMap::new(); + warnings + .iter() + .for_each(|wrn| *counter.entry(&wrn.lint).or_insert(0) += 1); + + // collect into a tupled list for sorting + let mut stats: Vec<(&&String, &usize)> = counter.iter().collect(); + // sort by "000{count} {clippy::lintname}" + // to not have a lint with 200 and 2 warnings take the same spot + stats.sort_by_key(|(lint, count)| format!("{count:0>4}, {lint}")); + + let mut header = String::from("| lint | count |\n"); + header.push_str("| -------------------------------------------------- | ----- |\n"); + let stats_string = stats + .iter() + .map(|(lint, count)| format!("| {lint:<50} | {count:>4} |\n")) + .fold(header, |mut table, line| { + table.push_str(&line); + table + }); + + (stats_string, counter) +} + +/// read the previous stats from the lintcheck-log file +fn read_stats_from_file(file_path: &Path) -> HashMap { + let file_content: String = match fs::read_to_string(file_path).ok() { + Some(content) => content, + None => { + return HashMap::new(); + }, + }; + + let lines: Vec = file_content.lines().map(ToString::to_string).collect(); + + lines + .iter() + .skip_while(|line| line.as_str() != "### Stats:") + // Skipping the table header and the `Stats:` label + .skip(4) + .take_while(|line| line.starts_with("| ")) + .filter_map(|line| { + let mut spl = line.split('|'); + // Skip the first `|` symbol + spl.next(); + if let (Some(lint), Some(count)) = (spl.next(), spl.next()) { + Some((lint.trim().to_string(), count.trim().parse::().unwrap())) + } else { + None + } + }) + .collect::>() +} + +/// print how lint counts changed between runs +fn print_stats(old_stats: HashMap, new_stats: HashMap<&String, usize>, lint_filter: &[String]) { + let same_in_both_hashmaps = old_stats + .iter() + .filter(|(old_key, old_val)| new_stats.get::<&String>(old_key) == Some(old_val)) + .map(|(k, v)| (k.to_string(), *v)) + .collect::>(); + + let mut old_stats_deduped = old_stats; + let mut new_stats_deduped = new_stats; + + // remove duplicates from both hashmaps + for (k, v) in &same_in_both_hashmaps { + assert!(old_stats_deduped.remove(k) == Some(*v)); + assert!(new_stats_deduped.remove(k) == Some(*v)); + } + + println!("\nStats:"); + + // list all new counts (key is in new stats but not in old stats) + new_stats_deduped + .iter() + .filter(|(new_key, _)| !old_stats_deduped.contains_key::(new_key)) + .for_each(|(new_key, new_value)| { + println!("{new_key} 0 => {new_value}"); + }); + + // list all changed counts (key is in both maps but value differs) + new_stats_deduped + .iter() + .filter(|(new_key, _new_val)| old_stats_deduped.contains_key::(new_key)) + .for_each(|(new_key, new_val)| { + let old_val = old_stats_deduped.get::(new_key).unwrap(); + println!("{new_key} {old_val} => {new_val}"); + }); + + // list all gone counts (key is in old status but not in new stats) + old_stats_deduped + .iter() + .filter(|(old_key, _)| !new_stats_deduped.contains_key::<&String>(old_key)) + .filter(|(old_key, _)| lint_filter.is_empty() || lint_filter.contains(old_key)) + .for_each(|(old_key, old_value)| { + println!("{old_key} {old_value} => 0"); + }); +} diff --git a/lintcheck/src/popular_crates.rs b/lintcheck/src/popular_crates.rs index 880a8bd81f083..ad8fc440c4240 100644 --- a/lintcheck/src/popular_crates.rs +++ b/lintcheck/src/popular_crates.rs @@ -44,7 +44,7 @@ pub(crate) fn fetch(output: PathBuf, number: usize) -> Result<(), Box let mut out = "[crates]\n".to_string(); for Crate { name, max_version } in crates { - writeln!(out, "{name} = {{ name = '{name}', versions = ['{max_version}'] }}").unwrap(); + writeln!(out, "{name} = {{ name = '{name}', version = '{max_version}' }}").unwrap(); } fs::write(output, out)?; diff --git a/lintcheck/src/recursive.rs b/lintcheck/src/recursive.rs index 24dddfe65636a..373ca6f991841 100644 --- a/lintcheck/src/recursive.rs +++ b/lintcheck/src/recursive.rs @@ -3,7 +3,8 @@ //! [`LintcheckServer`] to ask if it should be skipped, and if not sends the stderr of running //! clippy on the crate to the server -use crate::{ClippyWarning, RecursiveOptions}; +use crate::input::RecursiveOptions; +use crate::ClippyWarning; use std::collections::HashSet; use std::io::{BufRead, BufReader, Read, Write};