refactor: FindMatches

- Make internal methods private - Update javadoc contents - Make tokenize*Stem methods package-private - style: remove final from arguments - revert haveChanceToAdd logic to noChanceToAdd because all conditions are negated - Optimize an indexed for-loop Signed-off-by: Hiroshi Miura <miurahr@linux.com>
omegat-org · Nov 4, 2024 · 1e3b3ce · 1e3b3ce
1 parent ec5af02
commit 1e3b3ce
Showing 1 changed file with 41 additions and 41 deletions.
diff --git a/src/org/omegat/core/statistics/FindMatches.java b/src/org/omegat/core/statistics/FindMatches.java
@@ -87,8 +87,8 @@
 public class FindMatches {
 
     /**
-     * According to gettext source code, PO fuzzies are created above 60%
-     * https://sourceforge.net/p/omegat/feature-requests/1258/
+     * According to gettext source code, PO fuzzy entries are created above 60%
+     * <a href="https://sourceforge.net/p/omegat/feature-requests/1258/">RFE#1258</a>
      */
     static final int PENALTY_FOR_FUZZY = 40;
     private static final int PENALTY_FOR_REMOVED = 5;
@@ -273,19 +273,20 @@ public void iterate(EntryKey source, TMXEntry trans) {
         if (separateSegmentMatcher != null) {
             // split paragraph even when segmentation disabled, then find
             // matches for every segment
-            List<StringBuilder> spaces = new ArrayList<StringBuilder>();
-            List<Rule> brules = new ArrayList<Rule>();
+            List<StringBuilder> spaces = new ArrayList<>();
+            List<Rule> brules = new ArrayList<>();
             Language sourceLang = project.getProjectProperties().getSourceLanguage();
             Language targetLang = project.getProjectProperties().getTargetLanguage();
             List<String> segments = Core.getSegmenter().segment(sourceLang, srcText, spaces, brules);
             if (segments.size() > 1) {
-                List<String> fsrc = new ArrayList<String>(segments.size());
-                List<String> ftrans = new ArrayList<String>(segments.size());
+                int size = segments.size();
+                List<String> fsrc = new ArrayList<>(size);
+                List<String> ftrans = new ArrayList<>(size);
                 // multiple segments
-                for (short i = 0; i < segments.size(); i++) {
+                for (short i = 0; i < size; i++) {
                     String onesrc = segments.get(i);
 
-                    // find match for separate segment
+                    // find match for a separate segment
                     List<NearString> segmentMatch = separateSegmentMatcher.search(onesrc, requiresTranslation,
                             false, stop);
                     if (!segmentMatch.isEmpty()
@@ -307,7 +308,7 @@ public void iterate(EntryKey source, TMXEntry trans) {
         }
 
         if (fillSimilarityData) {
-            // fill similarity data only for result
+            // fill similarity data only for a result
             for (NearString near : result) {
                 // fix for bug 1586397
                 byte[] similarityData = FuzzyMatcher.buildSimilarityData(strTokensAll,
@@ -320,7 +321,7 @@ public void iterate(EntryKey source, TMXEntry trans) {
     }
 
     /**
-     * Compare one entry with original entry.
+     * Compare one entry with the original entry.
      *
      * @param key
      *            entry to compare
@@ -347,9 +348,9 @@ public void iterate(EntryKey source, TMXEntry trans) {
      * @param props
      *            TMX properties
      */
-    protected void processEntry(EntryKey key, String source, String translation,
-            NearString.MATCH_SOURCE comesFrom, boolean fuzzy, int penalty, String tmxName, String creator,
-            long creationDate, String changer, long changedDate, List<TMXProp> props) {
+    private void processEntry(EntryKey key, String source, String translation, NearString.MATCH_SOURCE comesFrom,
+                              boolean fuzzy, int penalty, String tmxName, String creator, long creationDate,
+                              String changer, long changedDate, List<TMXProp> props) {
         // remove part that is to be removed prior to tokenize
         String realSource = source;
         int realPenaltyForRemoved = 0;
@@ -380,8 +381,8 @@ protected void processEntry(EntryKey key, String source, String translation,
         }
         similarityStem -= realPenaltyForRemoved;
 
-        // check if we have chance by first percentage only
-        if (!haveChanceToAdd(similarityStem, Integer.MAX_VALUE, Integer.MAX_VALUE)) {
+        // check if we have a chance by first percentage only
+        if (noChanceToAdd(similarityStem, Integer.MAX_VALUE, Integer.MAX_VALUE)) {
             return;
         }
 
@@ -395,8 +396,8 @@ protected void processEntry(EntryKey key, String source, String translation,
         }
         similarityNoStem -= realPenaltyForRemoved;
 
-        // check if we have chance by first and second percentages
-        if (!haveChanceToAdd(similarityStem, similarityNoStem, Integer.MAX_VALUE)) {
+        // check if we have a chance by first and second percentages
+        if (noChanceToAdd(similarityStem, similarityNoStem, Integer.MAX_VALUE)) {
             return;
         }
 
@@ -411,7 +412,7 @@ protected void processEntry(EntryKey key, String source, String translation,
         simAdjusted -= realPenaltyForRemoved;
 
         // check if we have chance by first, second and third percentages
-        if (!haveChanceToAdd(similarityStem, similarityNoStem, simAdjusted)) {
+        if (noChanceToAdd(similarityStem, similarityNoStem, simAdjusted)) {
             return;
         }
 
@@ -422,24 +423,24 @@ protected void processEntry(EntryKey key, String source, String translation,
         }
 
         addNearString(key, source, translation, comesFrom, fuzzy, similarityStem, similarityNoStem,
-                simAdjusted, null, tmxName, creator, creationDate, changer, changedDate, props);
+                simAdjusted, tmxName, creator, creationDate, changer, changedDate, props);
     }
 
     /**
-     * Check if entry have a chance to be added to result list. If no, there is
-     * no sense to calculate other parameters.
+     * Check if entries have a chance to be added to a result list.
+     * If true, there is no sense to calculate other parameters.
      *
      * @param simStem
      *            similarity with stemming
      * @param simNoStem
      *            similarity without stemming
      * @param simExactly
      *            exactly similarity
-     * @return true if we have chance
+     * @return true if we have no chance.
      */
-    protected boolean haveChanceToAdd(final int simStem, final int simNoStem, final int simExactly) {
+    private boolean noChanceToAdd(int simStem, int simNoStem, int simExactly) {
         if (result.size() < maxCount) {
-            return true;
+            return false;
         }
         NearString st = result.get(result.size() - 1);
         int chance = Integer.compare(st.scores[0].score, simStem);
@@ -449,18 +450,17 @@ protected boolean haveChanceToAdd(final int simStem, final int simNoStem, final
         if (chance == 0) {
             chance = Integer.compare(st.scores[0].adjustedScore, simExactly);
         }
-        return chance != 1;
+        return chance == 1;
     }
 
     /**
-     * Add near string into result list. Near strings sorted by
-     * "similarity,simAdjusted"
+     * Add near string into the result list. Near strings sorted by
+     * "similarity, simAdjusted"
      */
-    protected void addNearString(final EntryKey key, final String source, final String translation,
-            NearString.MATCH_SOURCE comesFrom, final boolean fuzzy, final int similarity,
-            final int similarityNoStem, final int simAdjusted, final byte[] similarityData,
-            final String tmxName, final String creator, final long creationDate, final String changer,
-            final long changedDate, final List<TMXProp> tuProperties) {
+    private void addNearString(EntryKey key, String source, String translation, NearString.MATCH_SOURCE comesFrom,
+                               boolean fuzzy, int similarity, int similarityNoStem, int simAdjusted, String tmxName,
+                               String creator, long creationDate, String changer, long changedDate,
+                               List<TMXProp> tuProperties) {
         // find position for new data
         int pos = 0;
         for (int i = 0; i < result.size(); i++) {
@@ -471,7 +471,7 @@ protected void addNearString(final EntryKey key, final String source, final Stri
                 // multiple project entries.
                 result.set(i,
                         NearString.merge(st, key, source, translation, comesFrom, fuzzy, similarity,
-                                similarityNoStem, simAdjusted, similarityData, tmxName, creator, creationDate,
+                                similarityNoStem, simAdjusted, null, tmxName, creator, creationDate,
                                 changer, changedDate, tuProperties));
                 return;
             }
@@ -498,7 +498,7 @@ protected void addNearString(final EntryKey key, final String source, final Stri
 
         result.add(pos,
                 new NearString(key, source, translation, comesFrom, fuzzy, similarity, similarityNoStem,
-                        simAdjusted, similarityData, tmxName, creator, creationDate, changer, changedDate,
+                        simAdjusted, null, tmxName, creator, creationDate, changer, changedDate,
                         tuProperties));
         if (result.size() > maxCount) {
             result.remove(result.size() - 1);
@@ -508,11 +508,11 @@ protected void addNearString(final EntryKey key, final String source, final Stri
     /*
      * Methods for tokenize strings with caching.
      */
-    Map<String, Token[]> tokenizeStemCache = new HashMap<String, Token[]>();
-    Map<String, Token[]> tokenizeNoStemCache = new HashMap<String, Token[]>();
-    Map<String, Token[]> tokenizeAllCache = new HashMap<String, Token[]>();
+    Map<String, Token[]> tokenizeStemCache = new HashMap<>();
+    Map<String, Token[]> tokenizeNoStemCache = new HashMap<>();
+    Map<String, Token[]> tokenizeAllCache = new HashMap<>();
 
-    public Token[] tokenizeStem(String str) {
+    Token[] tokenizeStem(String str) {
         Token[] tokens = tokenizeStemCache.get(str);
         if (tokens == null) {
             tokens = tok.tokenizeWords(str, ITokenizer.StemmingMode.MATCHING);
@@ -521,7 +521,7 @@ public Token[] tokenizeStem(String str) {
         return tokens;
     }
 
-    public Token[] tokenizeNoStem(String str) {
+    Token[] tokenizeNoStem(String str) {
         // No-stemming token comparisons are intentionally case-insensitive
         // for matching purposes.
         str = str.toLowerCase(srcLocale);
@@ -533,7 +533,7 @@ public Token[] tokenizeNoStem(String str) {
         return tokens;
     }
 
-    public Token[] tokenizeAll(String str) {
+    Token[] tokenizeAll(String str) {
         // Verbatim token comparisons are intentionally case-insensitive.
         // for matching purposes.
         str = str.toLowerCase(srcLocale);
@@ -545,7 +545,7 @@ public Token[] tokenizeAll(String str) {
         return tokens;
     }
 
-    protected void checkStopped(IStopped stop) throws StoppedException {
+    private void checkStopped(IStopped stop) throws StoppedException {
         if (stop.isStopped()) {
             throw new StoppedException();
         }