Skip to content

Commit

Permalink
Merge branch 'gix-status'
Browse files Browse the repository at this point in the history
  • Loading branch information
Byron committed Nov 11, 2023
2 parents c372321 + a28bf90 commit c87f2cc
Show file tree
Hide file tree
Showing 26 changed files with 1,157 additions and 965 deletions.
14 changes: 8 additions & 6 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

335 changes: 142 additions & 193 deletions crate-status.md

Large diffs are not rendered by default.

9 changes: 4 additions & 5 deletions gitoxide-core/src/query/engine/update.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ use anyhow::{anyhow, bail};
use gix::objs::find::Error;
use gix::{
bstr::{BStr, BString, ByteSlice},
diff::rewrites::CopySource,
features::progress,
object::tree::diff::rewrites::CopySource,
parallel::{InOrderIter, SequenceId},
prelude::ObjectIdExt,
Count, Progress,
Expand Down Expand Up @@ -139,11 +139,10 @@ pub fn update(
});

let rewrites = {
let mut r =
gix::object::tree::diff::Rewrites::try_from_config(&repo.config_snapshot(), true)?.unwrap_or_default();
r.copies = Some(gix::object::tree::diff::rewrites::Copies {
let mut r = gix::diff::new_rewrites(&repo.config_snapshot(), true)?.unwrap_or_default();
r.copies = Some(gix::diff::rewrites::Copies {
source: if find_copies_harder {
CopySource::FromSetOfModifiedFilesAndSourceTree
CopySource::FromSetOfModifiedFilesAndAllSources
} else {
CopySource::FromSetOfModifiedFiles
},
Expand Down
4 changes: 3 additions & 1 deletion gix-diff/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ autotests = false

[features]
default = ["blob"]
## Enable diffing of blobs using imara-diff.
## Enable diffing of blobs using imara-diff, which also allows for a generic rewrite tracking implementation.
blob = ["dep:imara-diff"]
## Data structures implement `serde::Serialize` and `serde::Deserialize`.
serde = ["dep:serde", "gix-hash/serde", "gix-object/serde"]
Expand All @@ -25,10 +25,12 @@ doctest = false
[dependencies]
gix-hash = { version = "^0.13.1", path = "../gix-hash" }
gix-object = { version = "^0.38.0", path = "../gix-object" }

thiserror = "1.0.32"
imara-diff = { version = "0.1.3", optional = true }
serde = { version = "1.0.114", optional = true, default-features = false, features = ["derive"]}
getrandom = { version = "0.2.8", optional = true, default-features = false, features = ["js"] }
bstr = { version = "1.5.0", default-features = false }

document-features = { version = "0.2.0", optional = true }

Expand Down
15 changes: 15 additions & 0 deletions gix-diff/src/blob.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,18 @@
//! For using text diffs, please have a look at the [`imara-diff` documentation](https://docs.rs/imara-diff),
//! maintained by [Pascal Kuthe](https://github.com/pascalkuthe).
//!
//!
/// Information about the diff performed to detect similarity.
#[derive(Debug, Default, Clone, Copy, Eq, PartialEq)]
pub struct DiffLineStats {
/// The amount of lines to remove from the source to get to the destination.
pub removals: u32,
/// The amount of lines to add to the source to get to the destination.
pub insertions: u32,
/// The amount of lines of the previous state, in the source.
pub before: u32,
/// The amount of lines of the new state, in the destination.
pub after: u32,
}

pub use imara_diff::*;
28 changes: 28 additions & 0 deletions gix-diff/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,34 @@ cfg_attr(doc, doc = ::document_features::document_features!())
#![deny(missing_docs, rust_2018_idioms)]
#![forbid(unsafe_code)]

/// A structure to capture how to perform rename and copy tracking, used by the [rewrites::Tracker].
#[derive(Debug, Copy, Clone, PartialEq)]
#[cfg(feature = "blob")]
pub struct Rewrites {
/// If `Some(…)`, also find copies. `None` is the default which does not try to detect copies at all.
///
/// Note that this is an even more expensive operation than detecting renames stemming from additions and deletions
/// as the resulting set to search through is usually larger.
pub copies: Option<rewrites::Copies>,
/// The percentage of similarity needed for files to be considered renamed, defaulting to `Some(0.5)`.
/// This field is similar to `git diff -M50%`.
///
/// If `None`, files are only considered equal if their content matches 100%.
/// Note that values greater than 1.0 have no different effect than 1.0.
pub percentage: Option<f32>,
/// The amount of files to consider for fuzzy rename or copy tracking. Defaults to 1000, meaning that only 1000*1000
/// combinations can be tested for fuzzy matches, i.e. the ones that try to find matches by comparing similarity.
/// If 0, there is no limit.
///
/// If the limit would not be enough to test the entire set of combinations, the algorithm will trade in precision and not
/// run the fuzzy version of identity tests at all. That way results are never partial.
pub limit: usize,
}

/// Contains a [Tracker](rewrites::Tracker) to detect rewrites.
#[cfg(feature = "blob")]
pub mod rewrites;

///
pub mod tree;

Expand Down
77 changes: 77 additions & 0 deletions gix-diff/src/rewrites/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
use crate::Rewrites;

/// Types related to the rename tracker for renames, rewrites and copies.
pub mod tracker;

/// A type to retain state related to an ongoing tracking operation to retain sets of interesting changes
/// of which some are retained to at a later stage compute the ones that seem to be renames or copies.
pub struct Tracker<T> {
/// The tracked items thus far, which will be used to determine renames/copies and rewrites later.
items: Vec<tracker::Item<T>>,
/// A place to store all paths in to reduce amount of allocations.
path_backing: Vec<u8>,
/// A buffer for use when fetching objects for similarity tests.
buf1: Vec<u8>,
/// Another buffer for use when fetching objects for similarity tests.
buf2: Vec<u8>,
/// How to track copies and/or rewrites.
rewrites: Rewrites,
/// The diff algorithm to use when checking for similarity.
diff_algo: crate::blob::Algorithm,
}

/// Determine in which set of files to search for copies.
#[derive(Default, Debug, Copy, Clone, Eq, PartialEq)]
pub enum CopySource {
/// Find copies from the set of modified files only.
#[default]
FromSetOfModifiedFiles,
/// Find copies from the set of modified files, as well as all files known to the source (i.e. previous state of the tree).
///
/// This can be an expensive operation as it scales exponentially with the total amount of files in the set.
FromSetOfModifiedFilesAndAllSources,
}

/// Under which circumstances we consider a file to be a copy.
#[derive(Debug, Copy, Clone, PartialEq)]
pub struct Copies {
/// The set of files to search when finding the source of copies.
pub source: CopySource,
/// Equivalent to [`Rewrites::percentage`], but used for copy tracking.
///
/// Useful to have similarity-based rename tracking and cheaper copy tracking.
pub percentage: Option<f32>,
}

impl Default for Copies {
fn default() -> Self {
Copies {
source: CopySource::default(),
percentage: Some(0.5),
}
}
}

/// Information collected while handling rewrites of files which may be tracked.
#[derive(Default, Clone, Copy, Debug, PartialEq)]
pub struct Outcome {
/// The options used to guide the rewrite tracking. Either fully provided by the caller or retrieved from git configuration.
pub options: Rewrites,
/// The amount of similarity checks that have been conducted to find renamed files and potentially copies.
pub num_similarity_checks: usize,
/// Set to the amount of worst-case rename permutations we didn't search as our limit didn't allow it.
pub num_similarity_checks_skipped_for_rename_tracking_due_to_limit: usize,
/// Set to the amount of worst-case copy permutations we didn't search as our limit didn't allow it.
pub num_similarity_checks_skipped_for_copy_tracking_due_to_limit: usize,
}

/// The default settings for rewrites according to the git configuration defaults.
impl Default for Rewrites {
fn default() -> Self {
Rewrites {
copies: None,
percentage: Some(0.5),
limit: 1000,
}
}
}
Loading

0 comments on commit c87f2cc

Please sign in to comment.