Skip to content

Commit 25fa67a

Browse files
committed
OPENNLP-1661 Fix custom models being wiped from OpenNLP user.home directory
- deletes AbstractDownloadUtilTest.java removing historical code that wiped models - adds package-private DownloadUtil#existsModel(..) method to check models for certain language exist locally - adds to and adjusts related test classes
1 parent 3cf8b91 commit 25fa67a

File tree

4 files changed

+81
-103
lines changed

4 files changed

+81
-103
lines changed

opennlp-tools/src/main/java/opennlp/tools/util/DownloadUtil.java

+42-8
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
import java.net.URL;
2626
import java.nio.charset.StandardCharsets;
2727
import java.nio.file.Files;
28+
import java.nio.file.NoSuchFileException;
2829
import java.nio.file.Path;
2930
import java.nio.file.Paths;
3031
import java.nio.file.StandardCopyOption;
@@ -80,6 +81,41 @@ public enum ModelType {
8081

8182
private static Map<String, Map<ModelType, String>> availableModels;
8283

84+
/**
85+
* Checks if a model of the specified {@code modelType} has been downloaded already
86+
* for a particular {@code language}.
87+
*
88+
* @param language The ISO language code of the requested model.
89+
* @param modelType The {@link DownloadUtil.ModelType type} of model.
90+
* @return {@code true} if a model exists locally, {@code false} otherwise.
91+
* @throws IOException Thrown if IO errors occurred or the model is invalid.
92+
*/
93+
static boolean existsModel(String language, ModelType modelType) throws IOException {
94+
Map<ModelType, String> modelsByLanguage = getAvailableModels().get(language);
95+
if (modelsByLanguage == null) {
96+
return false;
97+
} else {
98+
final String url = modelsByLanguage.get(modelType);
99+
if (url != null) {
100+
final Path homeDirectory = Paths.get(System.getProperty("OPENNLP_DOWNLOAD_HOME",
101+
System.getProperty("user.home"))).resolve(".opennlp");
102+
final String filename = url.substring(url.lastIndexOf("/") + 1);
103+
final Path localFile = Paths.get(homeDirectory.toString(), filename);
104+
boolean exists;
105+
try {
106+
// if this does not throw the requested model exists AND is valid!
107+
validateModel(new URL(url + ".sha512"), localFile);
108+
exists = true;
109+
} catch (NoSuchFileException e) {
110+
exists = false;
111+
}
112+
return exists;
113+
} else {
114+
return false;
115+
}
116+
}
117+
}
118+
83119
/**
84120
* Triggers a download for the specified {@link DownloadUtil.ModelType}.
85121
*
@@ -94,7 +130,7 @@ public static <T extends BaseModel> T downloadModel(String language, ModelType m
94130
Class<T> type) throws IOException {
95131

96132
if (getAvailableModels().containsKey(language)) {
97-
final String url = (getAvailableModels().get(language).get(modelType));
133+
final String url = getAvailableModels().get(language).get(modelType);
98134
if (url != null) {
99135
return downloadModel(new URL(url), type);
100136
}
@@ -134,17 +170,14 @@ public static <T extends BaseModel> T downloadModel(URL url, Class<T> type) thro
134170
final Path localFile = Paths.get(homeDirectory.toString(), filename);
135171

136172
if (!Files.exists(localFile)) {
137-
logger.debug("Downloading model from {} to {}.", url, localFile);
173+
logger.debug("Downloading model to {}.", localFile);
138174

139175
try (final InputStream in = url.openStream()) {
140176
Files.copy(in, localFile, StandardCopyOption.REPLACE_EXISTING);
141177
}
142-
143178
validateModel(new URL(url + ".sha512"), localFile);
144-
145179
logger.debug("Download complete.");
146180
} else {
147-
System.out.println("Model file already exists. Skipping download.");
148181
logger.debug("Model file '{}' already exists. Skipping download.", filename);
149182
}
150183

@@ -167,7 +200,7 @@ public static Map<String, Map<ModelType, String>> getAvailableModels() {
167200
}
168201

169202
/**
170-
* Validates the downloaded model.
203+
* Validates a downloaded model via the specified {@link Path downloadedModel path}.
171204
*
172205
* @param sha512 the url to get the sha512 hash
173206
* @param downloadedModel the model file to check
@@ -187,8 +220,8 @@ private static void validateModel(URL sha512, Path downloadedModel) throws IOExc
187220
// Validate SHA512 checksum
188221
final String actualChecksum = calculateSHA512(downloadedModel);
189222
if (!actualChecksum.equalsIgnoreCase(expectedChecksum)) {
190-
throw new IOException("SHA512 checksum validation failed. Expected: "
191-
+ expectedChecksum + ", but got: " + actualChecksum);
223+
throw new IOException("SHA512 checksum validation failed for " + downloadedModel.getFileName() +
224+
". Expected: " + expectedChecksum + ", but got: " + actualChecksum);
192225
}
193226
}
194227

@@ -198,6 +231,7 @@ private static String calculateSHA512(Path file) throws IOException {
198231
try (InputStream fis = Files.newInputStream(file);
199232
DigestInputStream dis = new DigestInputStream(fis, digest)) {
200233
byte[] buffer = new byte[4096];
234+
//noinspection StatementWithEmptyBody
201235
while (dis.read(buffer) != -1) {
202236
// Reading the file to update the digest
203237
}

opennlp-tools/src/test/java/opennlp/tools/util/AbstractDownloadUtilTest.java

-79
This file was deleted.

opennlp-tools/src/test/java/opennlp/tools/util/DownloadUtilDownloadTwiceTest.java

+15-15
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@
3535
import static org.junit.jupiter.api.Assertions.assertEquals;
3636

3737
@EnabledWhenCDNAvailable(hostname = "dlcdn.apache.org")
38-
public class DownloadUtilDownloadTwiceTest extends AbstractDownloadUtilTest {
38+
public class DownloadUtilDownloadTwiceTest {
3939

4040
/*
4141
* Programmatic change to debug log to ensure that we can see log messages to
@@ -60,24 +60,24 @@ public static void cleanup() {
6060

6161
@Test
6262
public void testDownloadModelTwice() throws IOException {
63+
String lang = "de";
64+
DownloadUtil.ModelType type = DownloadUtil.ModelType.SENTENCE_DETECTOR;
65+
6366
try (LogCaptor logCaptor = LogCaptor.forClass(DownloadUtil.class)) {
64-
65-
DownloadUtil.downloadModel("de",
66-
DownloadUtil.ModelType.SENTENCE_DETECTOR, SentenceModel.class);
67-
68-
assertEquals(2, logCaptor.getDebugLogs().size());
69-
checkDebugLogsContainMessageFragment(logCaptor.getDebugLogs(), "Download complete.");
67+
boolean alreadyDownloaded = DownloadUtil.existsModel(lang, type);
68+
DownloadUtil.downloadModel(lang, type, SentenceModel.class);
69+
70+
if (! alreadyDownloaded) {
71+
assertEquals(2, logCaptor.getDebugLogs().size());
72+
checkDebugLogsContainMessageFragment(logCaptor.getDebugLogs(), "Download complete.");
73+
} else {
74+
assertEquals(1, logCaptor.getDebugLogs().size());
75+
checkDebugLogsContainMessageFragment(logCaptor.getDebugLogs(), "already exists. Skipping download.");
76+
}
7077
logCaptor.clearLogs();
7178

7279
// try to download again
73-
DownloadUtil.downloadModel("de",
74-
DownloadUtil.ModelType.SENTENCE_DETECTOR, SentenceModel.class);
75-
assertEquals(1, logCaptor.getDebugLogs().size());
76-
checkDebugLogsContainMessageFragment(logCaptor.getDebugLogs(), "already exists. Skipping download.");
77-
logCaptor.clearLogs();
78-
79-
DownloadUtil.downloadModel("de",
80-
DownloadUtil.ModelType.SENTENCE_DETECTOR, SentenceModel.class);
80+
DownloadUtil.downloadModel(lang, type, SentenceModel.class);
8181
assertEquals(1, logCaptor.getDebugLogs().size());
8282
checkDebugLogsContainMessageFragment(logCaptor.getDebugLogs(), "already exists. Skipping download.");
8383
logCaptor.clearLogs();

opennlp-tools/src/test/java/opennlp/tools/util/DownloadUtilTest.java

+24-1
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import java.net.URL;
2222
import java.util.stream.Stream;
2323

24+
import org.junit.jupiter.api.Test;
2425
import org.junit.jupiter.params.ParameterizedTest;
2526
import org.junit.jupiter.params.provider.Arguments;
2627
import org.junit.jupiter.params.provider.MethodSource;
@@ -32,11 +33,12 @@
3233
import opennlp.tools.tokenize.TokenizerModel;
3334

3435
import static org.junit.jupiter.api.Assertions.assertEquals;
36+
import static org.junit.jupiter.api.Assertions.assertFalse;
3537
import static org.junit.jupiter.api.Assertions.assertNotNull;
3638
import static org.junit.jupiter.api.Assertions.assertThrows;
3739
import static org.junit.jupiter.api.Assertions.assertTrue;
3840

39-
public class DownloadUtilTest extends AbstractDownloadUtilTest {
41+
public class DownloadUtilTest {
4042

4143
@ParameterizedTest(name = "Verify \"{0}\" sentence model")
4244
@ValueSource(strings = {"en", "fr", "de", "it", "nl", "bg", "ca", "cs", "da", "el",
@@ -62,6 +64,27 @@ public void testDownloadModelByURL(String language, URL url) throws IOException
6264
assertTrue(model.isLoadedFromSerialized());
6365
}
6466

67+
@Test
68+
@EnabledWhenCDNAvailable(hostname = "dlcdn.apache.org")
69+
public void testExistsModel() throws IOException {
70+
final String lang = "en";
71+
final DownloadUtil.ModelType type = DownloadUtil.ModelType.SENTENCE_DETECTOR;
72+
// Prepare
73+
SentenceModel model = DownloadUtil.downloadModel(lang, type, SentenceModel.class);
74+
assertNotNull(model);
75+
assertEquals(lang, model.getLanguage());
76+
// Test
77+
assertTrue(DownloadUtil.existsModel(lang, type));
78+
}
79+
80+
@ParameterizedTest
81+
@NullAndEmptySource
82+
@ValueSource(strings = {"xy", "\t", "\n"})
83+
@EnabledWhenCDNAvailable(hostname = "dlcdn.apache.org")
84+
public void testExistsModelInvalid(String input) throws IOException {
85+
assertFalse(DownloadUtil.existsModel(input, DownloadUtil.ModelType.SENTENCE_DETECTOR));
86+
}
87+
6588
@ParameterizedTest(name = "Detect invalid input: \"{0}\"")
6689
@NullAndEmptySource
6790
@ValueSource(strings = {" ", "\t", "\n"})

0 commit comments

Comments
 (0)