Skip to content

Commit 050f00b

Browse files
Add DatabaseRegistry for locally managing databases managed by GeoIpDownloader (#69971)
Backport of #69540 to 7.x branch. This component is responsible for making the databases maintained by GeoIpDownloader available for ingest processors. Also provided a lookup mechanism for geoip processors with fallback to {@link LocalDatabases}. All databases are downloaded into a geoip tmp directory, which is created at node startup. The following high level steps are executed after each cluster state update: 1) Check which databases are available in {@link GeoIpTaskState}, which is part of the geoip downloader persistent task. 2) For each database check whether the databases have changed by comparing the local and remote md5 hash or are locally missing. 3) For each database identified in step 2 start downloading the database chunks. Each chunks is appended to a tmp file (inside geoip tmp dir) and after all chunks have been downloaded, the database is uncompressed and renamed to the final filename. After this the database is loaded and if there is an old instance of this database then that is closed. 4) Cleanup locally loaded databases that are no longer mentioned in {@link GeoIpTaskState}. Relates to #68920 Other cherry-picked commits: * Fix ReloadingDatabasesWhilePerformingGeoLookupsIT (#70163) Wait for ingest threads to stop using the DatabaseReaderLazyLoader, so the during the next run the db update thread doesn't try to remove the db again (because the file hasn't yet been deleted). Also delete tmp dirs this test create at the end of the test, so that when repeating this test many times, this test doesn't accumulate many directories with database files. Closes #69980 * Fix clean up of old entries in DatabaseRegistry.initialize (#70135) This change switches clean up in DatabaseRegistry.initialize from using Files.walk and stream operations to Files.walkFileTree which can be made more robust in case of errors * Fix DatabaseRegistryTests (#70180) This test predefined expected md5 hashes in constants, that were expected with java15. However java16 creates different md5 hashes and so the expected md5 hashes don't match with the actual md5 hashes, which caused tests in this test suite to fail (running with java16 only). The tests now generates the expected md5 hash during the test instead of using predefined constants. Closes #69986 * Fix GeoIpDownloaderIT#testUseGeoIpProcessorWithDownloadedDBs(...) test (#70215) The test failure looks legit, because there is a possibility that the same databases was downloaded twice. See added comment in DatabaseRegistry class. Relates to #69972 * Muted GeoIpDownloaderIT#testUseGeoIpProcessorWithDownloadedDBs(...) test, see #69972 Co-authored-by: Przemko Robakowski <przemko.robakowski@elastic.co>
1 parent 369ee14 commit 050f00b

File tree

12 files changed

+1131
-183
lines changed

12 files changed

+1131
-183
lines changed

modules/ingest-geoip/src/internalClusterTest/java/org/elasticsearch/ingest/geoip/GeoIpDownloaderIT.java

Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,18 @@
1111
import com.maxmind.geoip2.DatabaseReader;
1212
import org.apache.lucene.search.TotalHits;
1313
import org.elasticsearch.action.admin.cluster.settings.ClusterUpdateSettingsResponse;
14+
import org.elasticsearch.action.ingest.SimulateDocumentBaseResult;
15+
import org.elasticsearch.action.ingest.SimulatePipelineRequest;
16+
import org.elasticsearch.action.ingest.SimulatePipelineResponse;
1417
import org.elasticsearch.action.search.SearchResponse;
1518
import org.elasticsearch.common.SuppressForbidden;
1619
import org.elasticsearch.common.collect.Set;
20+
import org.elasticsearch.common.bytes.BytesReference;
1721
import org.elasticsearch.common.settings.Settings;
22+
import org.elasticsearch.common.xcontent.XContentBuilder;
23+
import org.elasticsearch.common.xcontent.XContentType;
24+
import org.elasticsearch.common.xcontent.json.JsonXContent;
25+
import org.elasticsearch.env.Environment;
1826
import org.elasticsearch.index.query.BoolQueryBuilder;
1927
import org.elasticsearch.index.query.MatchQueryBuilder;
2028
import org.elasticsearch.index.query.RangeQueryBuilder;
@@ -24,6 +32,7 @@
2432
import org.elasticsearch.search.sort.SortOrder;
2533
import org.elasticsearch.test.ESIntegTestCase.ClusterScope;
2634
import org.elasticsearch.test.ESIntegTestCase.Scope;
35+
import org.elasticsearch.test.junit.annotations.TestLogging;
2736
import org.junit.After;
2837

2938
import java.io.BufferedOutputStream;
@@ -37,11 +46,20 @@
3746
import java.util.Iterator;
3847
import java.util.List;
3948
import java.util.concurrent.TimeUnit;
49+
import java.util.stream.Collectors;
50+
import java.util.stream.Stream;
51+
import java.util.stream.StreamSupport;
4052
import java.util.zip.GZIPInputStream;
4153

4254
import static java.nio.file.StandardOpenOption.CREATE;
4355
import static java.nio.file.StandardOpenOption.TRUNCATE_EXISTING;
4456
import static java.nio.file.StandardOpenOption.WRITE;
57+
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
58+
import static org.hamcrest.Matchers.containsInAnyOrder;
59+
import static org.hamcrest.Matchers.empty;
60+
import static org.hamcrest.Matchers.endsWith;
61+
import static org.hamcrest.Matchers.equalTo;
62+
import static org.hamcrest.Matchers.is;
4563

4664
@ClusterScope(scope = Scope.TEST, maxNumDataNodes = 1)
4765
public class GeoIpDownloaderIT extends AbstractGeoIpIT {
@@ -121,6 +139,132 @@ public void testGeoIpDatabasesDownload() throws Exception {
121139
}
122140
}
123141

142+
@AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/69972")
143+
@TestLogging(value = "org.elasticsearch.ingest.geoip:TRACE", reason = "https://github.com/elastic/elasticsearch/issues/69972")
144+
public void testUseGeoIpProcessorWithDownloadedDBs() throws Exception {
145+
// setup:
146+
BytesReference bytes;
147+
try (XContentBuilder builder = JsonXContent.contentBuilder()) {
148+
builder.startObject();
149+
{
150+
builder.startArray("processors");
151+
{
152+
builder.startObject();
153+
{
154+
builder.startObject("geoip");
155+
{
156+
builder.field("field", "ip");
157+
builder.field("target_field", "ip-city");
158+
builder.field("database_file", "GeoLite2-City.mmdb");
159+
}
160+
builder.endObject();
161+
}
162+
builder.endObject();
163+
builder.startObject();
164+
{
165+
builder.startObject("geoip");
166+
{
167+
builder.field("field", "ip");
168+
builder.field("target_field", "ip-country");
169+
builder.field("database_file", "GeoLite2-Country.mmdb");
170+
}
171+
builder.endObject();
172+
}
173+
builder.endObject();
174+
builder.startObject();
175+
{
176+
builder.startObject("geoip");
177+
{
178+
builder.field("field", "ip");
179+
builder.field("target_field", "ip-asn");
180+
builder.field("database_file", "GeoLite2-ASN.mmdb");
181+
}
182+
builder.endObject();
183+
}
184+
builder.endObject();
185+
}
186+
builder.endArray();
187+
}
188+
builder.endObject();
189+
bytes = BytesReference.bytes(builder);
190+
}
191+
assertAcked(client().admin().cluster().preparePutPipeline("_id", bytes, XContentType.JSON).get());
192+
193+
// verify before updating dbs
194+
try (XContentBuilder builder = JsonXContent.contentBuilder()) {
195+
builder.startObject();
196+
builder.startArray("docs");
197+
{
198+
builder.startObject();
199+
builder.field("_index", "my-index");
200+
{
201+
builder.startObject("_source");
202+
builder.field("ip", "89.160.20.128");
203+
builder.endObject();
204+
}
205+
builder.endObject();
206+
}
207+
builder.endArray();
208+
builder.endObject();
209+
bytes = BytesReference.bytes(builder);
210+
}
211+
SimulatePipelineRequest simulateRequest = new SimulatePipelineRequest(bytes, XContentType.JSON);
212+
simulateRequest.setId("_id");
213+
{
214+
SimulatePipelineResponse simulateResponse = client().admin().cluster().simulatePipeline(simulateRequest).actionGet();
215+
assertThat(simulateResponse.getPipelineId(), equalTo("_id"));
216+
assertThat(simulateResponse.getResults().size(), equalTo(1));
217+
SimulateDocumentBaseResult result = (SimulateDocumentBaseResult) simulateResponse.getResults().get(0);
218+
assertThat(result.getIngestDocument().getFieldValue("ip-city.city_name", String.class), equalTo("Tumba"));
219+
assertThat(result.getIngestDocument().getFieldValue("ip-asn.organization_name", String.class), equalTo("Bredband2 AB"));
220+
assertThat(result.getIngestDocument().getFieldValue("ip-country.country_name", String.class), equalTo("Sweden"));
221+
}
222+
223+
// Enable downloader:
224+
Settings.Builder settings = Settings.builder().put(GeoIpDownloaderTaskExecutor.ENABLED_SETTING.getKey(), true);
225+
assertAcked(client().admin().cluster().prepareUpdateSettings().setPersistentSettings(settings));
226+
227+
final List<Path> geoipTmpDirs = StreamSupport.stream(internalCluster().getInstances(Environment.class).spliterator(), false)
228+
.map(env -> {
229+
Path geoipTmpDir = env.tmpFile().resolve("geoip-databases");
230+
assertThat(Files.exists(geoipTmpDir), is(true));
231+
return geoipTmpDir;
232+
}).collect(Collectors.toList());
233+
assertBusy(() -> {
234+
for (Path geoipTmpDir : geoipTmpDirs) {
235+
try (Stream<Path> list = Files.list(geoipTmpDir)) {
236+
List<String> files = list.map(Path::toString).collect(Collectors.toList());
237+
assertThat(files, containsInAnyOrder(endsWith("GeoLite2-City.mmdb"), endsWith("GeoLite2-Country.mmdb"),
238+
endsWith("GeoLite2-ASN.mmdb")));
239+
}
240+
}
241+
});
242+
243+
// Verify after updating dbs:
244+
assertBusy(() -> {
245+
SimulatePipelineResponse simulateResponse = client().admin().cluster().simulatePipeline(simulateRequest).actionGet();
246+
assertThat(simulateResponse.getPipelineId(), equalTo("_id"));
247+
assertThat(simulateResponse.getResults().size(), equalTo(1));
248+
SimulateDocumentBaseResult result = (SimulateDocumentBaseResult) simulateResponse.getResults().get(0);
249+
assertThat(result.getIngestDocument().getFieldValue("ip-city.city_name", String.class), equalTo("Linköping"));
250+
assertThat(result.getIngestDocument().getFieldValue("ip-asn.organization_name", String.class), equalTo("Bredband2 AB"));
251+
assertThat(result.getIngestDocument().getFieldValue("ip-country.country_name", String.class), equalTo("Sweden"));
252+
});
253+
254+
// Disable downloader:
255+
settings = Settings.builder().put(GeoIpDownloaderTaskExecutor.ENABLED_SETTING.getKey(), false);
256+
assertAcked(client().admin().cluster().prepareUpdateSettings().setPersistentSettings(settings));
257+
258+
assertBusy(() -> {
259+
for (Path geoipTmpDir : geoipTmpDirs) {
260+
try (Stream<Path> list = Files.list(geoipTmpDir)) {
261+
List<String> files = list.map(Path::toString).collect(Collectors.toList());
262+
assertThat(files, empty());
263+
}
264+
}
265+
});
266+
}
267+
124268
@SuppressForbidden(reason = "Maxmind API requires java.io.File")
125269
private void parseDatabase(Path tempFile) throws IOException {
126270
try (DatabaseReader databaseReader = new DatabaseReader.Builder(tempFile.toFile()).build()) {

0 commit comments

Comments
 (0)