Skip to content

Commit

Permalink
Add cut-off criteria which enforces a minimum number of merges when u…
Browse files Browse the repository at this point in the history
…sing match merging. Below this threshold, merged matches are discarded.
  • Loading branch information
tsaglam committed Feb 14, 2025
1 parent 2f45711 commit 20b86ec
Showing 1 changed file with 9 additions and 1 deletion.
10 changes: 9 additions & 1 deletion core/src/main/java/de/jplag/merging/MatchMerging.java
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@
*/
public class MatchMerging {
private final JPlagOptions options;
private final static int MINIMUM_REQUIRED_MERGES = 3;
private int numberOfMerges;

/**
* Instantiates the match merging algorithm for a comparison result and a set of specific options.
Expand All @@ -47,13 +49,18 @@ public JPlagResult mergeMatchesOf(JPlagResult result) {
List<JPlagComparison> comparisonsMerged = new ArrayList<>();

ProgressBarLogger.iterate(ProgressBarType.MATCH_MERGING, comparisons, comparison -> {
numberOfMerges = 0;
Submission leftSubmission = comparison.firstSubmission().copy();
Submission rightSubmission = comparison.secondSubmission().copy();
List<Match> globalMatches = new ArrayList<>(comparison.matches());
globalMatches.addAll(comparison.ignoredMatches());
globalMatches = mergeNeighbors(globalMatches, leftSubmission, rightSubmission);
globalMatches = globalMatches.stream().filter(it -> it.length() >= options.minimumTokenMatch()).toList();
comparisonsMerged.add(new JPlagComparison(leftSubmission, rightSubmission, globalMatches, new ArrayList<>()));
if (numberOfMerges >= MINIMUM_REQUIRED_MERGES) {
comparisonsMerged.add(new JPlagComparison(leftSubmission, rightSubmission, globalMatches, new ArrayList<>()));
} else {
comparisonsMerged.add(comparison);
}
});

long durationInMillis = System.currentTimeMillis() - timeBeforeStartInMillis;
Expand Down Expand Up @@ -111,6 +118,7 @@ private List<Match> mergeNeighbors(List<Match> globalMatches, Submission leftSub
globalMatches = removeToken(globalMatches, leftSubmission, rightSubmission, upperNeighbor, tokenBetweenLeft, tokensBetweenRight);
neighbors = computeNeighbors(globalMatches);
i = 0;
numberOfMerges++;
} else {
i++;
}
Expand Down

0 comments on commit 20b86ec

Please sign in to comment.