Skip to content

Commit

Permalink
refactor: FindMatches
Browse files Browse the repository at this point in the history
- Make internal methods private
- Update javadoc contents
- Make tokenize*Stem methods package-private
- style: remove final from arguments
- revert haveChanceToAdd logic to noChanceToAdd because all conditions are negated
- Optimize an indexed for-loop

Signed-off-by: Hiroshi Miura <miurahr@linux.com>
  • Loading branch information
miurahr committed Nov 4, 2024
1 parent ec5af02 commit 1e3b3ce
Showing 1 changed file with 41 additions and 41 deletions.
82 changes: 41 additions & 41 deletions src/org/omegat/core/statistics/FindMatches.java
Original file line number Diff line number Diff line change
Expand Up @@ -87,8 +87,8 @@
public class FindMatches {

/**
* According to gettext source code, PO fuzzies are created above 60%
* https://sourceforge.net/p/omegat/feature-requests/1258/
* According to gettext source code, PO fuzzy entries are created above 60%
* <a href="https://sourceforge.net/p/omegat/feature-requests/1258/">RFE#1258</a>
*/
static final int PENALTY_FOR_FUZZY = 40;
private static final int PENALTY_FOR_REMOVED = 5;
Expand Down Expand Up @@ -273,19 +273,20 @@ public void iterate(EntryKey source, TMXEntry trans) {
if (separateSegmentMatcher != null) {
// split paragraph even when segmentation disabled, then find
// matches for every segment
List<StringBuilder> spaces = new ArrayList<StringBuilder>();
List<Rule> brules = new ArrayList<Rule>();
List<StringBuilder> spaces = new ArrayList<>();
List<Rule> brules = new ArrayList<>();
Language sourceLang = project.getProjectProperties().getSourceLanguage();
Language targetLang = project.getProjectProperties().getTargetLanguage();
List<String> segments = Core.getSegmenter().segment(sourceLang, srcText, spaces, brules);
if (segments.size() > 1) {
List<String> fsrc = new ArrayList<String>(segments.size());
List<String> ftrans = new ArrayList<String>(segments.size());
int size = segments.size();
List<String> fsrc = new ArrayList<>(size);
List<String> ftrans = new ArrayList<>(size);
// multiple segments
for (short i = 0; i < segments.size(); i++) {
for (short i = 0; i < size; i++) {
String onesrc = segments.get(i);

// find match for separate segment
// find match for a separate segment
List<NearString> segmentMatch = separateSegmentMatcher.search(onesrc, requiresTranslation,
false, stop);
if (!segmentMatch.isEmpty()
Expand All @@ -307,7 +308,7 @@ public void iterate(EntryKey source, TMXEntry trans) {
}

if (fillSimilarityData) {
// fill similarity data only for result
// fill similarity data only for a result
for (NearString near : result) {
// fix for bug 1586397
byte[] similarityData = FuzzyMatcher.buildSimilarityData(strTokensAll,
Expand All @@ -320,7 +321,7 @@ public void iterate(EntryKey source, TMXEntry trans) {
}

/**
* Compare one entry with original entry.
* Compare one entry with the original entry.
*
* @param key
* entry to compare
Expand All @@ -347,9 +348,9 @@ public void iterate(EntryKey source, TMXEntry trans) {
* @param props
* TMX properties
*/
protected void processEntry(EntryKey key, String source, String translation,
NearString.MATCH_SOURCE comesFrom, boolean fuzzy, int penalty, String tmxName, String creator,
long creationDate, String changer, long changedDate, List<TMXProp> props) {
private void processEntry(EntryKey key, String source, String translation, NearString.MATCH_SOURCE comesFrom,
boolean fuzzy, int penalty, String tmxName, String creator, long creationDate,
String changer, long changedDate, List<TMXProp> props) {
// remove part that is to be removed prior to tokenize
String realSource = source;
int realPenaltyForRemoved = 0;
Expand Down Expand Up @@ -380,8 +381,8 @@ protected void processEntry(EntryKey key, String source, String translation,
}
similarityStem -= realPenaltyForRemoved;

// check if we have chance by first percentage only
if (!haveChanceToAdd(similarityStem, Integer.MAX_VALUE, Integer.MAX_VALUE)) {
// check if we have a chance by first percentage only
if (noChanceToAdd(similarityStem, Integer.MAX_VALUE, Integer.MAX_VALUE)) {
return;
}

Expand All @@ -395,8 +396,8 @@ protected void processEntry(EntryKey key, String source, String translation,
}
similarityNoStem -= realPenaltyForRemoved;

// check if we have chance by first and second percentages
if (!haveChanceToAdd(similarityStem, similarityNoStem, Integer.MAX_VALUE)) {
// check if we have a chance by first and second percentages
if (noChanceToAdd(similarityStem, similarityNoStem, Integer.MAX_VALUE)) {
return;
}

Expand All @@ -411,7 +412,7 @@ protected void processEntry(EntryKey key, String source, String translation,
simAdjusted -= realPenaltyForRemoved;

// check if we have chance by first, second and third percentages
if (!haveChanceToAdd(similarityStem, similarityNoStem, simAdjusted)) {
if (noChanceToAdd(similarityStem, similarityNoStem, simAdjusted)) {
return;
}

Expand All @@ -422,24 +423,24 @@ protected void processEntry(EntryKey key, String source, String translation,
}

addNearString(key, source, translation, comesFrom, fuzzy, similarityStem, similarityNoStem,
simAdjusted, null, tmxName, creator, creationDate, changer, changedDate, props);
simAdjusted, tmxName, creator, creationDate, changer, changedDate, props);
}

/**
* Check if entry have a chance to be added to result list. If no, there is
* no sense to calculate other parameters.
* Check if entries have a chance to be added to a result list.
* If true, there is no sense to calculate other parameters.
*
* @param simStem
* similarity with stemming
* @param simNoStem
* similarity without stemming
* @param simExactly
* exactly similarity
* @return true if we have chance
* @return true if we have no chance.
*/
protected boolean haveChanceToAdd(final int simStem, final int simNoStem, final int simExactly) {
private boolean noChanceToAdd(int simStem, int simNoStem, int simExactly) {
if (result.size() < maxCount) {
return true;
return false;
}
NearString st = result.get(result.size() - 1);
int chance = Integer.compare(st.scores[0].score, simStem);
Expand All @@ -449,18 +450,17 @@ protected boolean haveChanceToAdd(final int simStem, final int simNoStem, final
if (chance == 0) {
chance = Integer.compare(st.scores[0].adjustedScore, simExactly);
}
return chance != 1;
return chance == 1;
}

/**
* Add near string into result list. Near strings sorted by
* "similarity,simAdjusted"
* Add near string into the result list. Near strings sorted by
* "similarity, simAdjusted"
*/
protected void addNearString(final EntryKey key, final String source, final String translation,
NearString.MATCH_SOURCE comesFrom, final boolean fuzzy, final int similarity,
final int similarityNoStem, final int simAdjusted, final byte[] similarityData,
final String tmxName, final String creator, final long creationDate, final String changer,
final long changedDate, final List<TMXProp> tuProperties) {
private void addNearString(EntryKey key, String source, String translation, NearString.MATCH_SOURCE comesFrom,
boolean fuzzy, int similarity, int similarityNoStem, int simAdjusted, String tmxName,
String creator, long creationDate, String changer, long changedDate,
List<TMXProp> tuProperties) {
// find position for new data
int pos = 0;
for (int i = 0; i < result.size(); i++) {
Expand All @@ -471,7 +471,7 @@ protected void addNearString(final EntryKey key, final String source, final Stri
// multiple project entries.
result.set(i,
NearString.merge(st, key, source, translation, comesFrom, fuzzy, similarity,
similarityNoStem, simAdjusted, similarityData, tmxName, creator, creationDate,
similarityNoStem, simAdjusted, null, tmxName, creator, creationDate,
changer, changedDate, tuProperties));
return;
}
Expand All @@ -498,7 +498,7 @@ protected void addNearString(final EntryKey key, final String source, final Stri

result.add(pos,
new NearString(key, source, translation, comesFrom, fuzzy, similarity, similarityNoStem,
simAdjusted, similarityData, tmxName, creator, creationDate, changer, changedDate,
simAdjusted, null, tmxName, creator, creationDate, changer, changedDate,
tuProperties));
if (result.size() > maxCount) {
result.remove(result.size() - 1);
Expand All @@ -508,11 +508,11 @@ protected void addNearString(final EntryKey key, final String source, final Stri
/*
* Methods for tokenize strings with caching.
*/
Map<String, Token[]> tokenizeStemCache = new HashMap<String, Token[]>();
Map<String, Token[]> tokenizeNoStemCache = new HashMap<String, Token[]>();
Map<String, Token[]> tokenizeAllCache = new HashMap<String, Token[]>();
Map<String, Token[]> tokenizeStemCache = new HashMap<>();
Map<String, Token[]> tokenizeNoStemCache = new HashMap<>();
Map<String, Token[]> tokenizeAllCache = new HashMap<>();

public Token[] tokenizeStem(String str) {
Token[] tokenizeStem(String str) {
Token[] tokens = tokenizeStemCache.get(str);
if (tokens == null) {
tokens = tok.tokenizeWords(str, ITokenizer.StemmingMode.MATCHING);
Expand All @@ -521,7 +521,7 @@ public Token[] tokenizeStem(String str) {
return tokens;
}

public Token[] tokenizeNoStem(String str) {
Token[] tokenizeNoStem(String str) {
// No-stemming token comparisons are intentionally case-insensitive
// for matching purposes.
str = str.toLowerCase(srcLocale);
Expand All @@ -533,7 +533,7 @@ public Token[] tokenizeNoStem(String str) {
return tokens;
}

public Token[] tokenizeAll(String str) {
Token[] tokenizeAll(String str) {
// Verbatim token comparisons are intentionally case-insensitive.
// for matching purposes.
str = str.toLowerCase(srcLocale);
Expand All @@ -545,7 +545,7 @@ public Token[] tokenizeAll(String str) {
return tokens;
}

protected void checkStopped(IStopped stop) throws StoppedException {
private void checkStopped(IStopped stop) throws StoppedException {
if (stop.isStopped()) {
throw new StoppedException();
}
Expand Down

0 comments on commit 1e3b3ce

Please sign in to comment.