From 14fb5c38809a8a6c2ca378852557434656843001 Mon Sep 17 00:00:00 2001 From: Spenser Black Date: Tue, 12 Sep 2023 20:28:27 -0400 Subject: [PATCH] Drop submodule support (#175) This drops submodule support, and in doing so simplifies the implementation. --- Cargo.lock | 4 +- Cargo.toml | 2 +- gengo-bin/Cargo.toml | 2 +- gengo/src/analysis/mod.rs | 37 +++++++----------- gengo/src/lib.rs | 81 ++++++++------------------------------- 5 files changed, 33 insertions(+), 93 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 6351b632..feccede5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -610,7 +610,7 @@ dependencies = [ [[package]] name = "gengo" -version = "0.5.2" +version = "0.6.0" dependencies = [ "criterion", "gix", @@ -628,7 +628,7 @@ dependencies = [ [[package]] name = "gengo-bin" -version = "0.5.2" +version = "0.6.0" dependencies = [ "clap", "gengo", diff --git a/Cargo.toml b/Cargo.toml index c5e377bb..1286d86d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,7 +4,7 @@ resolver = "2" [workspace.package] description = "Get the language distribution stats of your repository" -version = "0.5.2" +version = "0.6.0" edition = "2021" repository = "https://github.com/spenserblack/gengo" readme = "README.md" diff --git a/gengo-bin/Cargo.toml b/gengo-bin/Cargo.toml index df2015a0..a4abcd5e 100644 --- a/gengo-bin/Cargo.toml +++ b/gengo-bin/Cargo.toml @@ -19,7 +19,7 @@ color = ["owo-colors", "gengo/owo-colors"] [dependencies] clap = { version = "4", features = ["derive", "wrap_help"] } -gengo = { path = "../gengo", version = "0.5", default-features = false } +gengo = { path = "../gengo", version = "0.6", default-features = false } indexmap = "2" owo-colors = { version = "3", optional = true } diff --git a/gengo/src/analysis/mod.rs b/gengo/src/analysis/mod.rs index a20186f1..a0a51e43 100644 --- a/gengo/src/analysis/mod.rs +++ b/gengo/src/analysis/mod.rs @@ -11,28 +11,20 @@ pub use summary::Summary; mod summary; /// The result of analyzing a repository along with all of its submodules. -pub struct Analysis(pub(super) Vec); +pub struct Analysis(pub(super) crate::Results); impl Analysis { pub fn iter(&self) -> impl Iterator, &Entry)> + '_ { - self.0.iter().flat_map(|results| { - results.entries.iter().filter_map(|entry| { - entry.result.as_ref().and_then(|result| { - Some(( - { - let p = entry.index_entry.path_in(&results.path_storage); - if !results.root.is_empty() { - let mut base = results.root.clone(); - base.push(b'/'); - base.extend_from_slice(p); - gix::path::try_from_bstring(base).ok()?.into() - } else { - gix::path::try_from_bstr(p).ok()? - } - }, - result, - )) - }) + let results = &self.0; + results.entries.iter().filter_map(|entry| { + entry.result.as_ref().and_then(|result| { + Some(( + { + let p = entry.index_entry.path_in(&results.path_storage); + gix::path::try_from_bstr(p).ok()? + }, + result, + )) }) }) } @@ -50,11 +42,8 @@ impl Analysis { /// Summarizes the analysis by language and size. pub fn summary_with(&self, opts: SummaryOpts) -> Summary { let mut summary = IndexMap::new(); - for entry in self - .0 - .iter() - .flat_map(|results| results.entries.iter().filter_map(|e| e.result.as_ref())) - { + let results = &self.0; + for entry in results.entries.iter().filter_map(|e| e.result.as_ref()) { if !(opts.all || entry.detectable()) { continue; } diff --git a/gengo/src/lib.rs b/gengo/src/lib.rs index e48366c3..3e51cae5 100644 --- a/gengo/src/lib.rs +++ b/gengo/src/lib.rs @@ -12,13 +12,13 @@ use documentation::Documentation; pub use error::{Error, ErrorKind}; use generated::Generated; use gix::attrs::StateRef; -use gix::bstr::{BString, ByteSlice}; +use gix::bstr::ByteSlice; use gix::prelude::FindExt; use glob::MatchOptions; pub use languages::analyzer::Analyzers; use languages::Category; pub use languages::Language; -use std::collections::HashMap; + use std::path::Path; use std::sync::atomic::Ordering; use vendored::Vendored; @@ -88,28 +88,16 @@ struct BlobEntry { /// The result of analyzing a repository or a single submodule struct Results { - /// If this is a submodule, the root is not empty and the full path to where our paths start. - root: BString, entries: Vec, path_storage: gix::index::PathStorage, } impl Results { /// Create a data structure that holds index entries as well as our results per entry. - /// Return a list of paths at which submodules can be found, along with their - /// commit ids. - fn from_index( - root: BString, - index: gix::index::State, - ) -> (Self, Vec<(BString, gix::ObjectId)>) { + fn from_index(index: gix::index::State) -> Self { use gix::index::entry::Mode; let (entries, path_storage) = index.into_entries(); - let submodules: Vec<_> = entries - .iter() - .filter(|e| e.mode == Mode::COMMIT) - .map(|e| (e.path_in(&path_storage).to_owned(), e.id)) - .collect(); let entries: Vec<_> = entries .into_iter() .filter(|e| matches!(e.mode, Mode::FILE | Mode::FILE_EXECUTABLE)) @@ -118,14 +106,11 @@ impl Results { result: None, }) .collect(); - ( - Results { - root, - entries, - path_storage, - }, - submodules, - ) + + Results { + entries, + path_storage, + } } } @@ -134,45 +119,13 @@ impl Gengo { pub fn analyze(&self, rev: &str) -> Result { let repo = self.repository.to_thread_local(); let tree_id = repo.rev_parse_single(rev)?.object()?.peel_to_tree()?.id; - let mut stack = vec![(BString::default(), repo, tree_id)]; - - let mut all_results = Vec::new(); - while let Some((root, repo, tree_id)) = stack.pop() { - let is_submodule = !root.is_empty(); - let (state, index) = GitState::new(&repo, &tree_id)?; - let (mut results, submodule_id_by_path) = Results::from_index(root.clone(), index); - - let submodules = repo.submodules()?.map(|sms| { - sms.filter_map(|sm| { - let path = sm.path().ok()?; - let sm_repo = sm.open().ok().flatten()?; - Some((path.into_owned(), sm_repo)) - }) - .collect::>() - }); - self.analyze_index(&repo.into_sync(), &mut results, state, is_submodule)?; - all_results.push(results); - - if let Some(mut submodules_by_path) = submodules { - stack.extend( - submodule_id_by_path - .into_iter() - .filter_map(|(path, sm_commit)| { - let sm_repo = submodules_by_path.remove(&path)?; - let tree_id = - sm_repo.find_object(sm_commit).ok()?.peel_to_tree().ok()?.id; - let mut abs_root = root.clone(); - if !abs_root.is_empty() { - abs_root.push(b'/'); - } - abs_root.extend_from_slice(&path); - Some((abs_root, sm_repo, tree_id)) - }), - ); - } - } - Ok(Analysis(all_results)) + let (state, index) = GitState::new(&repo, &tree_id)?; + let mut results = Results::from_index(index); + + self.analyze_index(&repo.into_sync(), &mut results, state)?; + + Ok(Analysis(results)) } fn analyze_index( @@ -180,7 +133,6 @@ impl Gengo { repo: &gix::ThreadSafeRepository, results: &mut Results, state: GitState, - is_submodule: bool, ) -> Result<()> { gix::parallel::in_parallel_with_slice( &mut results.entries, @@ -195,7 +147,7 @@ impl Gengo { else { return Ok(()); }; - self.analyze_blob(path, repo, state, entry, is_submodule) + self.analyze_blob(path, repo, state, entry) }, || Some(std::time::Duration::from_micros(5)), std::convert::identity, @@ -209,7 +161,6 @@ impl Gengo { repo: &gix::Repository, state: &mut GitState, result: &mut BlobEntry, - is_submodule: bool, ) -> Result<()> { let filepath = filepath.as_ref(); let blob = repo.find_object(result.index_entry.id)?; @@ -261,7 +212,7 @@ impl Gengo { let vendored = attrs[3] .as_ref() .map(|info| info.assignment.state.is_set()) - .unwrap_or_else(|| is_submodule || self.is_vendored(filepath, contents)); + .unwrap_or_else(|| self.is_vendored(filepath, contents)); let detectable = match language.category() { Category::Data | Category::Prose => false,