refactor: FindMatches

- use PrepareTMXEntry/TMXEntry to reduce a number of parameters for private FindMatches#processEntry method Signed-off-by: Hiroshi Miura <miurahr@linux.com>
omegat-org · Nov 5, 2024 · ff3040d · ff3040d
1 parent 1e3b3ce
commit ff3040d
Show file tree

Hide file tree

Showing 2 changed files with 85 additions and 99 deletions.
diff --git a/config/checkstyle/suppressions.xml b/config/checkstyle/suppressions.xml
@@ -78,8 +78,8 @@
     <suppress files="Platform\.java" checks="ConstantName" lines="61-80"/>
     <!-- util/Preferences -->
     <suppress files="Preferences\.java" checks="LineLength" lines="197"/>
-    <!-- core/stat -->
-    <suppress checks="(ParameterNumber|MethodLength)" files="FindMatches\.java" lines="164,350,459"/>
+    <!-- core/statistics, ignore private FindMatches#addNearString -->
+    <suppress checks="ParameterNumber" files="FindMatches\.java" lines="410-440"/>
     <!-- util/xml -->
     <suppress checks="(EmptyBlock|MethodLength)" files="XMLStreamReader\.java"/>
     <!-- util -->

diff --git a/src/org/omegat/core/statistics/FindMatches.java b/src/org/omegat/core/statistics/FindMatches.java
@@ -42,11 +42,9 @@
 import org.omegat.core.data.ExternalTMFactory;
 import org.omegat.core.data.ExternalTMX;
 import org.omegat.core.data.IProject;
-import org.omegat.core.data.IProject.DefaultTranslationsIterator;
-import org.omegat.core.data.IProject.MultipleTranslationsIterator;
 import org.omegat.core.data.ITMXEntry;
+import org.omegat.core.data.PrepareTMXEntry;
 import org.omegat.core.data.SourceTextEntry;
-import org.omegat.core.data.TMXEntry;
 import org.omegat.core.events.IStopped;
 import org.omegat.core.matching.FuzzyMatcher;
 import org.omegat.core.matching.ISimilarityCalculator;
@@ -88,7 +86,8 @@ public class FindMatches {
 
     /**
      * According to gettext source code, PO fuzzy entries are created above 60%
-     * <a href="https://sourceforge.net/p/omegat/feature-requests/1258/">RFE#1258</a>
+     * <a href=
+     * "https://sourceforge.net/p/omegat/feature-requests/1258/">RFE#1258</a>
      */
     static final int PENALTY_FOR_FUZZY = 40;
     private static final int PENALTY_FOR_REMOVED = 5;
@@ -164,10 +163,8 @@ public FindMatches(IProject project, int maxCount, boolean allowSeparateSegmentM
     public List<NearString> search(String searchText, boolean requiresTranslation, boolean fillSimilarityData,
             IStopped stop) throws StoppedException {
         result = new ArrayList<>(OConsts.MAX_NEAR_STRINGS + 1);
-
         srcText = searchText;
         removedText = "";
-
         // remove part that is to be removed according to user settings.
         // Rationale: it might be a big string influencing the 'editing
         // distance', while it is not really part
@@ -181,56 +178,48 @@ public List<NearString> search(String searchText, boolean requiresTranslation, b
             srcText = removeMatcher.replaceAll("");
             removedText = removedBuffer.toString();
         }
-
-        // get tokens for original string
+        // get tokens for original string which includes non-word tokens
         strTokensStem = tokenizeStem(srcText);
         strTokensNoStem = tokenizeNoStem(srcText);
         strTokensAll = tokenizeAll(srcText);
-        /* HP: includes non - word tokens */
 
         // travel by project entries, including orphaned
         if (project.getProjectProperties().isSupportDefaultTranslations()) {
-            project.iterateByDefaultTranslations(new DefaultTranslationsIterator() {
-                public void iterate(String source, TMXEntry trans) {
-                    checkStopped(stop);
-                    if (!searchExactlyTheSame && source.equals(searchText)) {
-                        // skip original==original entry comparison
-                        return;
-                    }
-                    if (requiresTranslation && trans.translation == null) {
-                        return;
-                    }
-                    String fileName = project.isOrphaned(source) ? ORPHANED_FILE_NAME : null;
-                    processEntry(null, source, trans.translation, NearString.MATCH_SOURCE.MEMORY, false, 0,
-                            fileName, trans.creator, trans.creationDate, trans.changer, trans.changeDate,
-                            null);
-                }
-            });
-        }
-        project.iterateByMultipleTranslations(new MultipleTranslationsIterator() {
-            public void iterate(EntryKey source, TMXEntry trans) {
+            project.iterateByDefaultTranslations((source, trans) -> {
                 checkStopped(stop);
-                if (!searchExactlyTheSame && source.sourceText.equals(searchText)) {
+                if (!searchExactlyTheSame && source.equals(searchText)) {
                     // skip original==original entry comparison
                     return;
                 }
                 if (requiresTranslation && trans.translation == null) {
                     return;
                 }
                 String fileName = project.isOrphaned(source) ? ORPHANED_FILE_NAME : null;
-                processEntry(source, source.sourceText, trans.translation, NearString.MATCH_SOURCE.MEMORY,
-                        false, 0, fileName, trans.creator, trans.creationDate, trans.changer,
-                        trans.changeDate, null);
+                PrepareTMXEntry entry = new PrepareTMXEntry(trans);
+                entry.source = source;
+                processEntry(null, entry, fileName, NearString.MATCH_SOURCE.MEMORY, false, 0);
+            });
+        }
+        project.iterateByMultipleTranslations((source, trans) -> {
+            checkStopped(stop);
+            if (!searchExactlyTheSame && source.sourceText.equals(searchText)) {
+                // skip original==original entry comparison
+                return;
+            }
+            if (requiresTranslation && trans.translation == null) {
+                return;
             }
+            String fileName = project.isOrphaned(source) ? ORPHANED_FILE_NAME : null;
+            PrepareTMXEntry entry = new PrepareTMXEntry(trans);
+            entry.source = source.sourceText;
+            processEntry(source, entry, fileName, NearString.MATCH_SOURCE.MEMORY, false, 0);
         });
-
         /*
          * Penalty applied for fuzzy matches in another language (if no match in
          * the target language was found).
          */
         int foreignPenalty = Preferences.getPreferenceDefault(Preferences.PENALTY_FOR_FOREIGN_MATCHES,
                 Preferences.PENALTY_FOR_FOREIGN_MATCHES_DEFAULT);
-
         // travel by translation memories
         for (Map.Entry<String, ExternalTMX> en : project.getTransMemories().entrySet()) {
             int penalty = 0;
@@ -248,28 +237,28 @@ public void iterate(EntryKey source, TMXEntry trans) {
                 if (requiresTranslation && tmen.getTranslationText() == null) {
                     continue;
                 }
-
                 int tmenPenalty = penalty;
                 if (tmen.hasPropValue(ExternalTMFactory.TMXLoader.PROP_FOREIGN_MATCH, "true")) {
                     tmenPenalty += foreignPenalty;
                 }
-
-                processEntry(null, tmen.getSourceText(), tmen.getTranslationText(),
-                        NearString.MATCH_SOURCE.TM, false, tmenPenalty, en.getKey(), tmen.getCreator(),
-                        tmen.getCreationDate(), tmen.getChanger(), tmen.getChangeDate(), tmen.getProperties());
+                processEntry(null, tmen, en.getKey(), NearString.MATCH_SOURCE.TM, false, tmenPenalty);
             }
         }
-
         // travel by all entries for check source file translations
         for (SourceTextEntry ste : project.getAllEntries()) {
             checkStopped(stop);
             if (ste.getSourceTranslation() != null) {
-                processEntry(ste.getKey(), ste.getSrcText(), ste.getSourceTranslation(),
-                        NearString.MATCH_SOURCE.MEMORY, ste.isSourceTranslationFuzzy(), 0, ste.getKey().file,
-                        "", 0, "", 0, null);
+                PrepareTMXEntry entry = new PrepareTMXEntry();
+                entry.source = ste.getSrcText();
+                entry.translation = ste.getSourceTranslation();
+                entry.creator = "";
+                entry.changer = "";
+                entry.creationDate = 0;
+                entry.changeDate = 0;
+                processEntry(ste.getKey(), entry, ste.getKey().file, NearString.MATCH_SOURCE.MEMORY,
+                        ste.isSourceTranslationFuzzy(), 0);
             }
         }
-
         if (separateSegmentMatcher != null) {
             // split paragraph even when segmentation disabled, then find
             // matches for every segment
@@ -279,13 +268,10 @@ public void iterate(EntryKey source, TMXEntry trans) {
             Language targetLang = project.getProjectProperties().getTargetLanguage();
             List<String> segments = Core.getSegmenter().segment(sourceLang, srcText, spaces, brules);
             if (segments.size() > 1) {
-                int size = segments.size();
-                List<String> fsrc = new ArrayList<>(size);
-                List<String> ftrans = new ArrayList<>(size);
+                List<String> fsrc = new ArrayList<>(segments.size());
+                List<String> ftrans = new ArrayList<>(segments.size());
                 // multiple segments
-                for (short i = 0; i < size; i++) {
-                    String onesrc = segments.get(i);
-
+                for (String onesrc : segments) {
                     // find match for a separate segment
                     List<NearString> segmentMatch = separateSegmentMatcher.search(onesrc, requiresTranslation,
                             false, stop);
@@ -298,25 +284,23 @@ public void iterate(EntryKey source, TMXEntry trans) {
                         ftrans.add("");
                     }
                 }
-                // glue found sources
-                String foundSrc = Core.getSegmenter().glue(sourceLang, sourceLang, fsrc, spaces, brules);
-                // glue found translations
-                String foundTrans = Core.getSegmenter().glue(sourceLang, targetLang, ftrans, spaces, brules);
-                processEntry(null, foundSrc, foundTrans, NearString.MATCH_SOURCE.TM, false, 0, "", "", 0, "",
-                        0, null);
+                // glue found sources and translations
+                PrepareTMXEntry entry = new PrepareTMXEntry();
+                entry.source = Core.getSegmenter().glue(sourceLang, sourceLang, fsrc, spaces, brules);
+                entry.translation = Core.getSegmenter().glue(sourceLang, targetLang, ftrans, spaces, brules);
+                entry.creator = "";
+                entry.changer = "";
+                entry.creationDate = 0;
+                entry.changeDate = 0;
+                processEntry(null, entry, "", NearString.MATCH_SOURCE.TM, false, 0);
             }
         }
-
+        // fill similarity data only for a result
         if (fillSimilarityData) {
-            // fill similarity data only for a result
             for (NearString near : result) {
-                // fix for bug 1586397
-                byte[] similarityData = FuzzyMatcher.buildSimilarityData(strTokensAll,
-                        tokenizeAll(near.source));
-                near.attr = similarityData;
+                near.attr = FuzzyMatcher.buildSimilarityData(strTokensAll, tokenizeAll(near.source));
             }
         }
-
         return result;
     }
 
@@ -325,10 +309,8 @@ public void iterate(EntryKey source, TMXEntry trans) {
      *
      * @param key
      *            entry to compare
-     * @param source
-     *            source text
-     * @param translation
-     *            translation text
+     * @param entry
+     *            PrepareTMXEntry entry to process.
      * @param comesFrom
      *            match source
      * @param fuzzy
@@ -337,22 +319,11 @@ public void iterate(EntryKey source, TMXEntry trans) {
      *            penalty score
      * @param tmxName
      *            tmx name
-     * @param creator
-     *            translation creator
-     * @param creationDate
-     *            creation date of translation
-     * @param changer
-     *            last editor name
-     * @param changedDate
-     *            last change date
-     * @param props
-     *            TMX properties
      */
-    private void processEntry(EntryKey key, String source, String translation, NearString.MATCH_SOURCE comesFrom,
-                              boolean fuzzy, int penalty, String tmxName, String creator, long creationDate,
-                              String changer, long changedDate, List<TMXProp> props) {
+    public void processEntry(EntryKey key, ITMXEntry entry, String tmxName,
+                              NearString.MATCH_SOURCE comesFrom, boolean fuzzy, int penalty) {
         // remove part that is to be removed prior to tokenize
-        String realSource = source;
+        String realSource = entry.getSourceText();
         int realPenaltyForRemoved = 0;
         if (removePattern != null) {
             StringBuilder entryRemovedText = new StringBuilder();
@@ -422,13 +393,12 @@ private void processEntry(EntryKey key, String source, String translation, NearS
             return;
         }
 
-        addNearString(key, source, translation, comesFrom, fuzzy, similarityStem, similarityNoStem,
-                simAdjusted, tmxName, creator, creationDate, changer, changedDate, props);
+        addNearString(key, entry, comesFrom, fuzzy, similarityStem, similarityNoStem, simAdjusted, tmxName);
     }
 
     /**
-     * Check if entries have a chance to be added to a result list.
-     * If true, there is no sense to calculate other parameters.
+     * Check if entries have a chance to be added to a result list. If true,
+     * there is no sense to calculate other parameters.
      *
      * @param simStem
      *            similarity with stemming
@@ -454,25 +424,29 @@ private boolean noChanceToAdd(int simStem, int simNoStem, int simExactly) {
     }
 
     /**
-     * Add near string into the result list. Near strings sorted by
-     * "similarity, simAdjusted"
+     * Add near string into the result list. Near strings sorted by "similarity,
+     * simAdjusted"
      */
-    private void addNearString(EntryKey key, String source, String translation, NearString.MATCH_SOURCE comesFrom,
-                               boolean fuzzy, int similarity, int similarityNoStem, int simAdjusted, String tmxName,
-                               String creator, long creationDate, String changer, long changedDate,
-                               List<TMXProp> tuProperties) {
+    private void addNearString(EntryKey key, ITMXEntry entry, NearString.MATCH_SOURCE comesFrom, boolean fuzzy,
+                               int similarity, int similarityNoStem, int simAdjusted, String tmxName) {
+        final String source = entry.getSourceText();
+        final String translation = entry.getTranslationText();
+        final String creator = entry.getCreator();
+        final long creationDate = entry.getCreationDate();
+        final String changer = entry.getChanger();
+        final long changedDate =  entry.getChangeDate();
+        final List<TMXProp> tuProperties = entry.getProperties();
         // find position for new data
         int pos = 0;
         for (int i = 0; i < result.size(); i++) {
             NearString st = result.get(i);
             if (source.equals(st.source) && Objects.equals(translation, st.translation)) {
                 // Consolidate identical matches from different sources into a
-                // single NearString with
-                // multiple project entries.
+                // single NearString with multiple project entries.
                 result.set(i,
                         NearString.merge(st, key, source, translation, comesFrom, fuzzy, similarity,
-                                similarityNoStem, simAdjusted, null, tmxName, creator, creationDate,
-                                changer, changedDate, tuProperties));
+                                similarityNoStem, simAdjusted, null, tmxName, creator, creationDate, changer,
+                                changedDate, tuProperties));
                 return;
             }
             if (st.scores[0].score < similarity) {
@@ -552,10 +526,22 @@ private void checkStopped(IStopped stop) throws StoppedException {
     }
 
     /**
-     * Process will throw this exception if it stopped.All callers must catch it
-     * and just skip.
+     * The Process will throw this exception if it stopped. All callers must
+     * catch it and just skip.
      */
     @SuppressWarnings("serial")
     public static class StoppedException extends RuntimeException {
     }
+
+    static class Similarity {
+        int similarity;
+        int similarityNoStem;
+        int simAdjusted;
+
+        Similarity(int sim, int simNoStem, int simAdj) {
+            similarity = sim;
+            similarityNoStem = simNoStem;
+            simAdjusted = simAdj;
+        }
+    }
 }