Skip to content

Commit

Permalink
feat!: generalize rename-tracking engine for later use with status.
Browse files Browse the repository at this point in the history
Previously the rename tracking engine was integrated with tree-diffs,
but already operates in a stand-alone fashion.
Now it's officially generalized which allows it to be tested separately
and used when tracking renames for diffs between index and tree, index
and index, and index and worktree.
  • Loading branch information
Byron committed Nov 5, 2023
1 parent 7e437ad commit 7ce9173
Show file tree
Hide file tree
Showing 9 changed files with 849 additions and 669 deletions.
8 changes: 2 additions & 6 deletions gix/src/config/cache/access.rs
Original file line number Diff line number Diff line change
Expand Up @@ -93,13 +93,9 @@ impl Cache {
}

#[cfg(feature = "blob-diff")]
pub(crate) fn diff_renames(
&self,
) -> Result<Option<crate::object::tree::diff::Rewrites>, crate::object::tree::diff::rewrites::Error> {
pub(crate) fn diff_renames(&self) -> Result<Option<crate::diff::Rewrites>, crate::diff::rewrites::Error> {
self.diff_renames
.get_or_try_init(|| {
crate::object::tree::diff::Rewrites::try_from_config(&self.resolved, self.lenient_config)
})
.get_or_try_init(|| crate::diff::Rewrites::try_from_config(&self.resolved, self.lenient_config))
.copied()
}

Expand Down
2 changes: 1 addition & 1 deletion gix/src/config/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -515,7 +515,7 @@ pub(crate) struct Cache {
pub(crate) url_rewrite: OnceCell<crate::remote::url::Rewrite>,
/// The lazy-loaded rename information for diffs.
#[cfg(feature = "blob-diff")]
pub(crate) diff_renames: OnceCell<Option<crate::object::tree::diff::Rewrites>>,
pub(crate) diff_renames: OnceCell<Option<crate::diff::Rewrites>>,
/// A lazily loaded mapping to know which url schemes to allow
#[cfg(any(feature = "blocking-network-client", feature = "async-network-client"))]
pub(crate) url_scheme: OnceCell<crate::remote::url::SchemePermission>,
Expand Down
756 changes: 756 additions & 0 deletions gix/src/diff.rs

Large diffs are not rendered by default.

14 changes: 1 addition & 13 deletions gix/src/object/tree/diff/change.rs
Original file line number Diff line number Diff line change
@@ -1,18 +1,6 @@
use crate::diff::blob::DiffLineStats;
use crate::{bstr::BStr, Id};

/// Information about the diff performed to detect similarity of a [Rewrite][Event::Rewrite].
#[derive(Debug, Default, Clone, Copy, Eq, PartialEq)]
pub struct DiffLineStats {
/// The amount of lines to remove from the source to get to the destination.
pub removals: u32,
/// The amount of lines to add to the source to get to the destination.
pub insertions: u32,
/// The amount of lines of the previous state, in the source.
pub before: u32,
/// The amount of lines of the new state, in the destination.
pub after: u32,
}

/// An event emitted when finding differences between two trees.
#[derive(Debug, Clone, Copy)]
pub enum Event<'a, 'old, 'new> {
Expand Down
100 changes: 82 additions & 18 deletions gix/src/object/tree/diff/for_each.rs
Original file line number Diff line number Diff line change
@@ -1,15 +1,8 @@
use gix_object::TreeRefIter;

use super::{change, Action, Change, Platform};
use crate::{
bstr::BStr,
ext::ObjectIdExt,
object::tree::{
diff,
diff::{rewrites, tracked},
},
Repository, Tree,
};
use crate::diff::rewrites::tracker;
use crate::{bstr::BStr, diff::rewrites, ext::ObjectIdExt, object::tree::diff, Repository, Tree};

/// The error return by methods on the [diff platform][Platform].
#[derive(Debug, thiserror::Error)]
Expand All @@ -19,12 +12,10 @@ pub enum Error {
Diff(#[from] gix_diff::tree::changes::Error),
#[error("The user-provided callback failed")]
ForEach(#[source] Box<dyn std::error::Error + Send + Sync + 'static>),
#[error("Could not find blob for similarity checking")]
FindExistingBlob(#[from] crate::object::find::existing::Error),
#[error("Could not configure diff algorithm prior to checking similarity")]
ConfigureDiffAlgorithm(#[from] crate::config::diff::algorithm::Error),
#[error("Could not traverse tree to obtain possible sources for copies")]
TraverseTreeForExhaustiveCopyDetection(#[from] gix_traverse::tree::breadthfirst::Error),
#[error("Failure during rename tracking")]
RenameTracking(#[from] tracker::emit::Error),
}

///
Expand All @@ -49,12 +40,14 @@ impl<'a, 'old> Platform<'a, 'old> {
E: std::error::Error + Sync + Send + 'static,
{
let repo = self.lhs.repo;
let diff_algo = repo.config.diff_algorithm()?;
let mut delegate = Delegate {
src_tree: self.lhs,
other_repo: other.repo,
recorder: gix_diff::tree::Recorder::default().track_location(self.tracking),
visit: for_each,
tracked: self.rewrites.map(|r| tracked::State::new(r, self.tracking)),
location: self.tracking,
tracked: self.rewrites.map(|r| rewrites::Tracker::new(r, diff_algo)),
err: None,
};
match gix_diff::tree::Changes::from(TreeRefIter::from_bytes(&self.lhs.data)).needed_to_obtain(
Expand Down Expand Up @@ -87,7 +80,8 @@ struct Delegate<'a, 'old, 'new, VisitFn, E> {
other_repo: &'new Repository,
recorder: gix_diff::tree::Recorder,
visit: VisitFn,
tracked: Option<tracked::State>,
tracked: Option<rewrites::Tracker>,
location: Option<gix_diff::tree::recorder::Location>,
err: Option<E>,
}

Expand Down Expand Up @@ -157,8 +151,8 @@ where
id: oid.to_owned().attach(self.other_repo),
diff: source.diff,
copy: match source.kind {
tracked::visit::Kind::RenameTarget => false,
tracked::visit::Kind::CopyDestination => true,
tracker::visit::Kind::RenameTarget => false,
tracker::visit::Kind::CopyDestination => true,
},
},
};
Expand All @@ -180,7 +174,12 @@ where
&mut self.err,
),
},
self.src_tree,
|oid, buf| self.src_tree.repo.objects.find_blob(oid, buf),
|push| {
self.src_tree
.traverse()
.breadthfirst(&mut tree_to_changes::Delegate::new(push, self.location))
},
)?;
Ok(Some(outcome))
}
Expand Down Expand Up @@ -233,3 +232,68 @@ where
}
}
}

mod tree_to_changes {
use gix_diff::tree::visit::Change;
use gix_object::tree::EntryRef;

use crate::bstr::BStr;

pub struct Delegate<'a> {
push: &'a mut dyn FnMut(Change, &BStr),
recorder: gix_traverse::tree::Recorder,
}

impl<'a> Delegate<'a> {
pub fn new(
push: &'a mut dyn FnMut(Change, &BStr),
location: Option<gix_diff::tree::recorder::Location>,
) -> Self {
let location = location.map(|t| match t {
gix_diff::tree::recorder::Location::FileName => gix_traverse::tree::recorder::Location::FileName,
gix_diff::tree::recorder::Location::Path => gix_traverse::tree::recorder::Location::Path,
});
Self {
push,
recorder: gix_traverse::tree::Recorder::default().track_location(location),
}
}
}

impl gix_traverse::tree::Visit for Delegate<'_> {
fn pop_front_tracked_path_and_set_current(&mut self) {
self.recorder.pop_front_tracked_path_and_set_current()
}

fn push_back_tracked_path_component(&mut self, component: &BStr) {
self.recorder.push_back_tracked_path_component(component)
}

fn push_path_component(&mut self, component: &BStr) {
self.recorder.push_path_component(component)
}

fn pop_path_component(&mut self) {
self.recorder.pop_path_component();
}

fn visit_tree(&mut self, _entry: &EntryRef<'_>) -> gix_traverse::tree::visit::Action {
gix_traverse::tree::visit::Action::Continue
}

fn visit_nontree(&mut self, entry: &EntryRef<'_>) -> gix_traverse::tree::visit::Action {
if entry.mode.is_blob() {
(self.push)(
Change::Modification {
previous_entry_mode: entry.mode,
previous_oid: gix_hash::ObjectId::null(entry.oid.kind()),
entry_mode: entry.mode,
oid: entry.oid.to_owned(),
},
self.recorder.path(),
);
}
gix_traverse::tree::visit::Action::Continue
}
}
}
31 changes: 2 additions & 29 deletions gix/src/object/tree/diff/mod.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use gix_diff::tree::recorder::Location;

use crate::diff::Rewrites;
use crate::{bstr::BStr, Tree};

/// Returned by the `for_each` function to control flow.
Expand Down Expand Up @@ -39,7 +40,7 @@ impl<'repo> Tree<'repo> {
/// try to access blobs to compute a similarity metric. Thus, it's more compatible to turn rewrite tracking off
/// using [`Platform::track_rewrites()`].
#[allow(clippy::result_large_err)]
pub fn changes<'a>(&'a self) -> Result<Platform<'a, 'repo>, rewrites::Error> {
pub fn changes<'a>(&'a self) -> Result<Platform<'a, 'repo>, crate::diff::rewrites::Error> {
Ok(Platform {
state: Default::default(),
lhs: self,
Expand All @@ -58,34 +59,6 @@ pub struct Platform<'a, 'repo> {
rewrites: Option<Rewrites>,
}

/// A structure to capture how to perform rename and copy tracking
#[derive(Debug, Copy, Clone, PartialEq)]
pub struct Rewrites {
/// If `Some(…)`, do also find copies. `None` is the default which does not try to detect copies at all.
///
/// Note that this is an even more expensive operation than detecting renames as files.
pub copies: Option<rewrites::Copies>,
/// The percentage of similarity needed for files to be considered renamed, defaulting to `Some(0.5)`.
/// This field is similar to `git diff -M50%`.
///
/// If `None`, files are only considered equal if their content matches 100%.
/// Note that values greater than 1.0 have no different effect than 1.0.
pub percentage: Option<f32>,
/// The amount of files to consider for fuzzy rename or copy tracking. Defaults to 1000, meaning that only 1000*1000
/// combinations can be tested for fuzzy matches, i.e. the ones that try to find matches by comparing similarity.
/// If 0, there is no limit.
///
/// If the limit would not be enough to test the entire set of combinations, the algorithm will trade in precision and not
/// run the fuzzy version of identity tests at all. That way results are never partial.
pub limit: usize,
}

///
pub mod rewrites;

/// types to actually perform rename tracking.
pub(crate) mod tracked;

/// Configuration
impl<'a, 'repo> Platform<'a, 'repo> {
/// Keep track of file-names, which makes the [`location`][Change::location] field usable with the filename of the changed item.
Expand Down
108 changes: 0 additions & 108 deletions gix/src/object/tree/diff/rewrites.rs

This file was deleted.

Loading

0 comments on commit 7ce9173

Please sign in to comment.