From 8d736ece14f2993174f97e80257b6dbe4a6157e9 Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Wed, 10 Mar 2021 11:40:20 +0100 Subject: [PATCH] Fix GeoIpDownloaderIT#testUseGeoIpProcessorWithDownloadedDBs(...) test The test failure looks legit, because there is a possibility that the same databases was downloaded twice. See added comment in DatabaseRegistry class. Relates to #69972 --- .../ingest/geoip/GeoIpDownloaderIT.java | 3 ++- .../ingest/geoip/DatabaseRegistry.java | 17 +++++++++++++++-- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/modules/ingest-geoip/src/internalClusterTest/java/org/elasticsearch/ingest/geoip/GeoIpDownloaderIT.java b/modules/ingest-geoip/src/internalClusterTest/java/org/elasticsearch/ingest/geoip/GeoIpDownloaderIT.java index 13035df1106e2..9ad00c6c223c2 100644 --- a/modules/ingest-geoip/src/internalClusterTest/java/org/elasticsearch/ingest/geoip/GeoIpDownloaderIT.java +++ b/modules/ingest-geoip/src/internalClusterTest/java/org/elasticsearch/ingest/geoip/GeoIpDownloaderIT.java @@ -33,6 +33,7 @@ import org.elasticsearch.search.sort.SortOrder; import org.elasticsearch.test.ESIntegTestCase.ClusterScope; import org.elasticsearch.test.ESIntegTestCase.Scope; +import org.elasticsearch.test.junit.annotations.TestLogging; import org.junit.After; import java.io.BufferedOutputStream; @@ -143,7 +144,7 @@ public void testGeoIpDatabasesDownload() throws Exception { } } - @AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/69972") + @TestLogging(value = "org.elasticsearch.ingest.geoip:TRACE", reason = "https://github.com/elastic/elasticsearch/issues/69972") public void testUseGeoIpProcessorWithDownloadedDBs() throws Exception { // setup: BytesReference bytes; diff --git a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/DatabaseRegistry.java b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/DatabaseRegistry.java index 48133f0f5f2be..d8bcf562962c6 100644 --- a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/DatabaseRegistry.java +++ b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/DatabaseRegistry.java @@ -230,6 +230,19 @@ void retrieveAndUpdateDatabase(String databaseName, GeoIpTaskState.Metadata meta return; } + // 2 types of threads: + // 1) The thread that checks whether database should be retrieved / updated and creates (^) tmp file (cluster state applied thread) + // 2) the thread that downloads the db file, updates the databases map and then removes the tmp file + // Thread 2 may have updated the databases map after thread 1 detects that there is no entry (or md5 mismatch) for a database. + // If thread 2 then also removes the tmp file before thread 1 attempts to create it then we're about to retrieve the same database + // twice. This check is here to avoid this: + DatabaseReaderLazyLoader lazyLoader = databases.get(databaseName); + if (lazyLoader != null && recordedMd5.equals(lazyLoader.getMd5())) { + LOGGER.debug("deleting tmp file because database [{}] has already been updated.", databaseName); + Files.delete(databaseTmpGzFile); + return; + } + final Path databaseTmpFile = Files.createFile(geoipTmpDirectory.resolve(databaseName + ".tmp")); LOGGER.info("downloading geoip database [{}] to [{}]", databaseName, databaseTmpGzFile); retrieveDatabase( @@ -250,8 +263,8 @@ void retrieveAndUpdateDatabase(String databaseName, GeoIpTaskState.Metadata meta failure -> { LOGGER.error((Supplier) () -> new ParameterizedMessage("failed to download database [{}]", databaseName), failure); try { - Files.delete(databaseTmpFile); - Files.delete(databaseTmpGzFile); + Files.deleteIfExists(databaseTmpFile); + Files.deleteIfExists(databaseTmpGzFile); } catch (IOException ioe) { ioe.addSuppressed(failure); LOGGER.error("Unable to delete tmp database file after failure", ioe);