Skip to content

Commit c87f2cc

Browse files
committed
Merge branch 'gix-status'
2 parents c372321 + a28bf90 commit c87f2cc

File tree

26 files changed

+1157
-965
lines changed

26 files changed

+1157
-965
lines changed

Diff for: Cargo.lock

+8-6
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Diff for: crate-status.md

+142-193
Large diffs are not rendered by default.

Diff for: gitoxide-core/src/query/engine/update.rs

+4-5
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,8 @@ use anyhow::{anyhow, bail};
99
use gix::objs::find::Error;
1010
use gix::{
1111
bstr::{BStr, BString, ByteSlice},
12+
diff::rewrites::CopySource,
1213
features::progress,
13-
object::tree::diff::rewrites::CopySource,
1414
parallel::{InOrderIter, SequenceId},
1515
prelude::ObjectIdExt,
1616
Count, Progress,
@@ -139,11 +139,10 @@ pub fn update(
139139
});
140140

141141
let rewrites = {
142-
let mut r =
143-
gix::object::tree::diff::Rewrites::try_from_config(&repo.config_snapshot(), true)?.unwrap_or_default();
144-
r.copies = Some(gix::object::tree::diff::rewrites::Copies {
142+
let mut r = gix::diff::new_rewrites(&repo.config_snapshot(), true)?.unwrap_or_default();
143+
r.copies = Some(gix::diff::rewrites::Copies {
145144
source: if find_copies_harder {
146-
CopySource::FromSetOfModifiedFilesAndSourceTree
145+
CopySource::FromSetOfModifiedFilesAndAllSources
147146
} else {
148147
CopySource::FromSetOfModifiedFiles
149148
},

Diff for: gix-diff/Cargo.toml

+3-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ autotests = false
1212

1313
[features]
1414
default = ["blob"]
15-
## Enable diffing of blobs using imara-diff.
15+
## Enable diffing of blobs using imara-diff, which also allows for a generic rewrite tracking implementation.
1616
blob = ["dep:imara-diff"]
1717
## Data structures implement `serde::Serialize` and `serde::Deserialize`.
1818
serde = ["dep:serde", "gix-hash/serde", "gix-object/serde"]
@@ -25,10 +25,12 @@ doctest = false
2525
[dependencies]
2626
gix-hash = { version = "^0.13.1", path = "../gix-hash" }
2727
gix-object = { version = "^0.38.0", path = "../gix-object" }
28+
2829
thiserror = "1.0.32"
2930
imara-diff = { version = "0.1.3", optional = true }
3031
serde = { version = "1.0.114", optional = true, default-features = false, features = ["derive"]}
3132
getrandom = { version = "0.2.8", optional = true, default-features = false, features = ["js"] }
33+
bstr = { version = "1.5.0", default-features = false }
3234

3335
document-features = { version = "0.2.0", optional = true }
3436

Diff for: gix-diff/src/blob.rs

+15
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,18 @@
11
//! For using text diffs, please have a look at the [`imara-diff` documentation](https://docs.rs/imara-diff),
22
//! maintained by [Pascal Kuthe](https://github.com/pascalkuthe).
3+
//!
4+
//!
5+
/// Information about the diff performed to detect similarity.
6+
#[derive(Debug, Default, Clone, Copy, Eq, PartialEq)]
7+
pub struct DiffLineStats {
8+
/// The amount of lines to remove from the source to get to the destination.
9+
pub removals: u32,
10+
/// The amount of lines to add to the source to get to the destination.
11+
pub insertions: u32,
12+
/// The amount of lines of the previous state, in the source.
13+
pub before: u32,
14+
/// The amount of lines of the new state, in the destination.
15+
pub after: u32,
16+
}
17+
318
pub use imara_diff::*;

Diff for: gix-diff/src/lib.rs

+28
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,34 @@ cfg_attr(doc, doc = ::document_features::document_features!())
88
#![deny(missing_docs, rust_2018_idioms)]
99
#![forbid(unsafe_code)]
1010

11+
/// A structure to capture how to perform rename and copy tracking, used by the [rewrites::Tracker].
12+
#[derive(Debug, Copy, Clone, PartialEq)]
13+
#[cfg(feature = "blob")]
14+
pub struct Rewrites {
15+
/// If `Some(…)`, also find copies. `None` is the default which does not try to detect copies at all.
16+
///
17+
/// Note that this is an even more expensive operation than detecting renames stemming from additions and deletions
18+
/// as the resulting set to search through is usually larger.
19+
pub copies: Option<rewrites::Copies>,
20+
/// The percentage of similarity needed for files to be considered renamed, defaulting to `Some(0.5)`.
21+
/// This field is similar to `git diff -M50%`.
22+
///
23+
/// If `None`, files are only considered equal if their content matches 100%.
24+
/// Note that values greater than 1.0 have no different effect than 1.0.
25+
pub percentage: Option<f32>,
26+
/// The amount of files to consider for fuzzy rename or copy tracking. Defaults to 1000, meaning that only 1000*1000
27+
/// combinations can be tested for fuzzy matches, i.e. the ones that try to find matches by comparing similarity.
28+
/// If 0, there is no limit.
29+
///
30+
/// If the limit would not be enough to test the entire set of combinations, the algorithm will trade in precision and not
31+
/// run the fuzzy version of identity tests at all. That way results are never partial.
32+
pub limit: usize,
33+
}
34+
35+
/// Contains a [Tracker](rewrites::Tracker) to detect rewrites.
36+
#[cfg(feature = "blob")]
37+
pub mod rewrites;
38+
1139
///
1240
pub mod tree;
1341

Diff for: gix-diff/src/rewrites/mod.rs

+77
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
use crate::Rewrites;
2+
3+
/// Types related to the rename tracker for renames, rewrites and copies.
4+
pub mod tracker;
5+
6+
/// A type to retain state related to an ongoing tracking operation to retain sets of interesting changes
7+
/// of which some are retained to at a later stage compute the ones that seem to be renames or copies.
8+
pub struct Tracker<T> {
9+
/// The tracked items thus far, which will be used to determine renames/copies and rewrites later.
10+
items: Vec<tracker::Item<T>>,
11+
/// A place to store all paths in to reduce amount of allocations.
12+
path_backing: Vec<u8>,
13+
/// A buffer for use when fetching objects for similarity tests.
14+
buf1: Vec<u8>,
15+
/// Another buffer for use when fetching objects for similarity tests.
16+
buf2: Vec<u8>,
17+
/// How to track copies and/or rewrites.
18+
rewrites: Rewrites,
19+
/// The diff algorithm to use when checking for similarity.
20+
diff_algo: crate::blob::Algorithm,
21+
}
22+
23+
/// Determine in which set of files to search for copies.
24+
#[derive(Default, Debug, Copy, Clone, Eq, PartialEq)]
25+
pub enum CopySource {
26+
/// Find copies from the set of modified files only.
27+
#[default]
28+
FromSetOfModifiedFiles,
29+
/// Find copies from the set of modified files, as well as all files known to the source (i.e. previous state of the tree).
30+
///
31+
/// This can be an expensive operation as it scales exponentially with the total amount of files in the set.
32+
FromSetOfModifiedFilesAndAllSources,
33+
}
34+
35+
/// Under which circumstances we consider a file to be a copy.
36+
#[derive(Debug, Copy, Clone, PartialEq)]
37+
pub struct Copies {
38+
/// The set of files to search when finding the source of copies.
39+
pub source: CopySource,
40+
/// Equivalent to [`Rewrites::percentage`], but used for copy tracking.
41+
///
42+
/// Useful to have similarity-based rename tracking and cheaper copy tracking.
43+
pub percentage: Option<f32>,
44+
}
45+
46+
impl Default for Copies {
47+
fn default() -> Self {
48+
Copies {
49+
source: CopySource::default(),
50+
percentage: Some(0.5),
51+
}
52+
}
53+
}
54+
55+
/// Information collected while handling rewrites of files which may be tracked.
56+
#[derive(Default, Clone, Copy, Debug, PartialEq)]
57+
pub struct Outcome {
58+
/// The options used to guide the rewrite tracking. Either fully provided by the caller or retrieved from git configuration.
59+
pub options: Rewrites,
60+
/// The amount of similarity checks that have been conducted to find renamed files and potentially copies.
61+
pub num_similarity_checks: usize,
62+
/// Set to the amount of worst-case rename permutations we didn't search as our limit didn't allow it.
63+
pub num_similarity_checks_skipped_for_rename_tracking_due_to_limit: usize,
64+
/// Set to the amount of worst-case copy permutations we didn't search as our limit didn't allow it.
65+
pub num_similarity_checks_skipped_for_copy_tracking_due_to_limit: usize,
66+
}
67+
68+
/// The default settings for rewrites according to the git configuration defaults.
69+
impl Default for Rewrites {
70+
fn default() -> Self {
71+
Rewrites {
72+
copies: None,
73+
percentage: Some(0.5),
74+
limit: 1000,
75+
}
76+
}
77+
}

0 commit comments

Comments
 (0)