diff --git a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceDetectorME.java b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceDetectorME.java index e65eed33b..96953506d 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceDetectorME.java +++ b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceDetectorME.java @@ -99,11 +99,21 @@ public SentenceDetectorME(String language) throws IOException { * @param model the {@link SentenceModel} */ public SentenceDetectorME(SentenceModel model) { - SentenceDetectorFactory sdFactory = model.getFactory(); + this(model, model.getAbbreviations()); + } + + /** + * Instantiates a {@link SentenceDetectorME} with an existing {@link SentenceModel}. + * + * @param model The {@link SentenceModel} to be used. + * @param abbDict The {@link Dictionary} to be used. It must fit the language of the {@code model}. + */ + public SentenceDetectorME(SentenceModel model, Dictionary abbDict) { this.model = model.getMaxentModel(); + this.abbDict = abbDict; + SentenceDetectorFactory sdFactory = model.getFactory(); cgen = sdFactory.getSDContextGenerator(); scanner = sdFactory.getEndOfSentenceScanner(); - abbDict = model.getAbbreviations(); useTokenEnd = sdFactory.isUseTokenEnd(); } diff --git a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/ThreadSafeSentenceDetectorME.java b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/ThreadSafeSentenceDetectorME.java index 17ea14e87..7706cfa83 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/ThreadSafeSentenceDetectorME.java +++ b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/ThreadSafeSentenceDetectorME.java @@ -18,6 +18,7 @@ package opennlp.tools.sentdetect; import opennlp.tools.commons.ThreadSafe; +import opennlp.tools.dictionary.Dictionary; import opennlp.tools.util.Span; /** @@ -37,20 +38,31 @@ public class ThreadSafeSentenceDetectorME implements SentenceDetector, AutoCloseable { private final SentenceModel model; + private final Dictionary abbDict; private final ThreadLocal threadLocal = new ThreadLocal<>(); public ThreadSafeSentenceDetectorME(SentenceModel model) { - super(); + this(model, model.getAbbreviations()); + } + + /** + * Instantiates a {@link ThreadSafeSentenceDetectorME} with an existing {@link SentenceModel}. + * + * @param model The {@link SentenceModel} to be used. + * @param abbDict The {@link Dictionary} to be used. It must fit the language of the {@code model}. + */ + public ThreadSafeSentenceDetectorME(SentenceModel model, Dictionary abbDict) { this.model = model; + this.abbDict = abbDict; } // If a thread-local version exists, return it. Otherwise, create, then return. private SentenceDetectorME getSD() { SentenceDetectorME sd = threadLocal.get(); if (sd == null) { - sd = new SentenceDetectorME(model); + sd = new SentenceDetectorME(model, abbDict); threadLocal.set(sd); } return sd; diff --git a/opennlp-tools/src/main/java/opennlp/tools/tokenize/ThreadSafeTokenizerME.java b/opennlp-tools/src/main/java/opennlp/tools/tokenize/ThreadSafeTokenizerME.java index 3ebbd1e36..13de7bc5b 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/tokenize/ThreadSafeTokenizerME.java +++ b/opennlp-tools/src/main/java/opennlp/tools/tokenize/ThreadSafeTokenizerME.java @@ -18,6 +18,7 @@ package opennlp.tools.tokenize; import opennlp.tools.commons.ThreadSafe; +import opennlp.tools.dictionary.Dictionary; import opennlp.tools.util.Span; /** @@ -37,18 +38,29 @@ public class ThreadSafeTokenizerME implements Tokenizer, AutoCloseable { private final TokenizerModel model; - + private final Dictionary abbDict; + private final ThreadLocal threadLocal = new ThreadLocal<>(); public ThreadSafeTokenizerME(TokenizerModel model) { - super(); + this(model, model.getAbbreviations()); + } + + /** + * Instantiates a {@link ThreadSafeTokenizerME} with an existing {@link TokenizerModel}. + * + * @param model The {@link TokenizerModel} to be used. + * @param abbDict The {@link Dictionary} to be used. It must fit the language of the {@code model}. + */ + public ThreadSafeTokenizerME(TokenizerModel model, Dictionary abbDict) { this.model = model; + this.abbDict = abbDict; } private TokenizerME getTokenizer() { TokenizerME tokenizer = threadLocal.get(); if (tokenizer == null) { - tokenizer = new TokenizerME(model); + tokenizer = new TokenizerME(model, abbDict); threadLocal.set(tokenizer); } return tokenizer; diff --git a/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerME.java b/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerME.java index 1c88f84b9..ee0d82675 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerME.java +++ b/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerME.java @@ -129,13 +129,23 @@ public TokenizerME(String language) throws IOException { * @param model The {@link TokenizerModel} to be used. */ public TokenizerME(TokenizerModel model) { + this(model, model.getAbbreviations()); + } + + /** + * Instantiates a {@link TokenizerME} with an existing {@link TokenizerModel}. + * + * @param model The {@link TokenizerModel} to be used. + * @param abbDict The {@link Dictionary} to be used. It must fit the language of the {@code model}. + */ + public TokenizerME(TokenizerModel model, Dictionary abbDict) { + this.model = model.getMaxentModel(); + this.abbDict = abbDict; TokenizerFactory factory = model.getFactory(); - this.alphanumeric = factory.getAlphaNumericPattern(); this.cg = factory.getContextGenerator(); - this.model = model.getMaxentModel(); + this.alphanumeric = factory.getAlphaNumericPattern(); this.useAlphaNumericOptimization = factory.isUseAlphaNumericOptimization(); - abbDict = model.getAbbreviations(); newTokens = new ArrayList<>(); tokProbs = new ArrayList<>(50); }