From b1b42822730c97953e54666a4a45e8da64c95090 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20B=C3=BCscher?= Date: Wed, 8 Jan 2020 18:25:00 +0100 Subject: [PATCH] Make Multiplexer inherit filter chains analysis mode (#50662) Currently, if an updateable synonym filter is included in a multiplexer filter, it is not reloaded via the _reload_search_analyzers because the multiplexer itself doesn't pass on the analysis mode of the filters it contains, so its not recognized as "updateable" in itself. Instead we can check and merge the AnalysisMode settings of all filters in the multiplexer and use the resulting mode (e.g. search-time only) for the multiplexer itself, thus making any synonym filters contained in it reloadable. This, of course, will also make the analyzers using the multiplexer be usable at search-time only. Closes #50554 --- .../common/MultiplexerTokenFilterFactory.java | 11 ++ .../index/analysis/AnalysisMode.java | 2 +- .../action/ReloadSynonymAnalyzerTests.java | 164 +++++++++++++----- 3 files changed, 135 insertions(+), 42 deletions(-) diff --git a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/MultiplexerTokenFilterFactory.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/MultiplexerTokenFilterFactory.java index 9c53fc1f63e3b..477675a188811 100644 --- a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/MultiplexerTokenFilterFactory.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/MultiplexerTokenFilterFactory.java @@ -32,6 +32,7 @@ import org.elasticsearch.env.Environment; import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.analysis.AbstractTokenFilterFactory; +import org.elasticsearch.index.analysis.AnalysisMode; import org.elasticsearch.index.analysis.CharFilterFactory; import org.elasticsearch.index.analysis.TokenFilterFactory; import org.elasticsearch.index.analysis.TokenizerFactory; @@ -84,12 +85,15 @@ public TokenFilterFactory getChainAwareTokenFilterFactory(TokenizerFactory token if (preserveOriginal) { filters.add(IDENTITY_FILTER); } + // also merge and transfer token filter analysis modes with analyzer + AnalysisMode mode = AnalysisMode.ALL; for (String filter : filterNames) { String[] parts = Strings.tokenizeToStringArray(filter, ","); if (parts.length == 1) { TokenFilterFactory factory = resolveFilterFactory(allFilters, parts[0]); factory = factory.getChainAwareTokenFilterFactory(tokenizer, charFilters, previousTokenFilters, allFilters); filters.add(factory); + mode = mode.merge(factory.getAnalysisMode()); } else { List existingChain = new ArrayList<>(previousTokenFilters); List chain = new ArrayList<>(); @@ -98,10 +102,12 @@ public TokenFilterFactory getChainAwareTokenFilterFactory(TokenizerFactory token factory = factory.getChainAwareTokenFilterFactory(tokenizer, charFilters, existingChain, allFilters); chain.add(factory); existingChain.add(factory); + mode = mode.merge(factory.getAnalysisMode()); } filters.add(chainFilters(filter, chain)); } } + final AnalysisMode analysisMode = mode; return new TokenFilterFactory() { @Override @@ -133,6 +139,11 @@ public TokenFilterFactory getSynonymFilter() { + "] cannot be used to parse synonyms unless [preserve_original] is [true]"); } } + + @Override + public AnalysisMode getAnalysisMode() { + return analysisMode; + } }; } diff --git a/server/src/main/java/org/elasticsearch/index/analysis/AnalysisMode.java b/server/src/main/java/org/elasticsearch/index/analysis/AnalysisMode.java index ea9e1e0c6aa7f..5b781ea7a2589 100644 --- a/server/src/main/java/org/elasticsearch/index/analysis/AnalysisMode.java +++ b/server/src/main/java/org/elasticsearch/index/analysis/AnalysisMode.java @@ -78,5 +78,5 @@ public String getReadableName() { *
  • INDEX_TIME.merge(SEARCH_TIME) throws an {@link IllegalStateException}
  • * */ - abstract AnalysisMode merge(AnalysisMode other); + public abstract AnalysisMode merge(AnalysisMode other); } diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/action/ReloadSynonymAnalyzerTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/action/ReloadSynonymAnalyzerTests.java index 750b76711524a..c70554f14f348 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/action/ReloadSynonymAnalyzerTests.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/action/ReloadSynonymAnalyzerTests.java @@ -43,35 +43,25 @@ protected Collection> getPlugins() { public void testSynonymsUpdateable() throws FileNotFoundException, IOException { String synonymsFileName = "synonyms.txt"; - Path configDir = node().getEnvironment().configFile(); - if (Files.exists(configDir) == false) { - Files.createDirectory(configDir); - } - Path synonymsFile = configDir.resolve(synonymsFileName); - if (Files.exists(synonymsFile) == false) { - Files.createFile(synonymsFile); - } - try (PrintWriter out = new PrintWriter( - new OutputStreamWriter(Files.newOutputStream(synonymsFile, StandardOpenOption.WRITE), StandardCharsets.UTF_8))) { - out.println("foo, baz"); - } + Path synonymsFile = setupSynonymsFile(synonymsFileName, "foo, baz"); final String indexName = "test"; final String synonymAnalyzerName = "synonym_analyzer"; final String synonymGraphAnalyzerName = "synonym_graph_analyzer"; - assertAcked(client().admin().indices().prepareCreate(indexName).setSettings(Settings.builder() - .put("index.number_of_shards", 5) - .put("index.number_of_replicas", 0) - .put("analysis.analyzer." + synonymAnalyzerName + ".tokenizer", "standard") - .putList("analysis.analyzer." + synonymAnalyzerName + ".filter", "lowercase", "synonym_filter") - .put("analysis.analyzer." + synonymGraphAnalyzerName + ".tokenizer", "standard") - .putList("analysis.analyzer." + synonymGraphAnalyzerName + ".filter", "lowercase", "synonym_graph_filter") - .put("analysis.filter.synonym_filter.type", "synonym") - .put("analysis.filter.synonym_filter.updateable", "true") - .put("analysis.filter.synonym_filter.synonyms_path", synonymsFileName) - .put("analysis.filter.synonym_graph_filter.type", "synonym_graph") - .put("analysis.filter.synonym_graph_filter.updateable", "true") - .put("analysis.filter.synonym_graph_filter.synonyms_path", synonymsFileName)) + assertAcked(client().admin().indices().prepareCreate(indexName) + .setSettings(Settings.builder() + .put("index.number_of_shards", 5) + .put("index.number_of_replicas", 0) + .put("analysis.analyzer." + synonymAnalyzerName + ".tokenizer", "standard") + .putList("analysis.analyzer." + synonymAnalyzerName + ".filter", "lowercase", "synonym_filter") + .put("analysis.analyzer." + synonymGraphAnalyzerName + ".tokenizer", "standard") + .putList("analysis.analyzer." + synonymGraphAnalyzerName + ".filter", "lowercase", "synonym_graph_filter") + .put("analysis.filter.synonym_filter.type", "synonym") + .put("analysis.filter.synonym_filter.updateable", "true") + .put("analysis.filter.synonym_filter.synonyms_path", synonymsFileName) + .put("analysis.filter.synonym_graph_filter.type", "synonym_graph") + .put("analysis.filter.synonym_graph_filter.updateable", "true") + .put("analysis.filter.synonym_graph_filter.synonyms_path", synonymsFileName)) .addMapping("_doc", "field", "type=text,analyzer=standard,search_analyzer=" + synonymAnalyzerName)); client().prepareIndex(indexName, "_doc", "1").setSource("field", "Foo").get(); @@ -84,8 +74,7 @@ public void testSynonymsUpdateable() throws FileNotFoundException, IOException { { for (String analyzerName : new String[] { synonymAnalyzerName, synonymGraphAnalyzerName }) { - Response analyzeResponse = client().admin().indices().prepareAnalyze(indexName, "foo").setAnalyzer(analyzerName) - .get(); + Response analyzeResponse = client().admin().indices().prepareAnalyze(indexName, "foo").setAnalyzer(analyzerName).get(); assertEquals(2, analyzeResponse.getTokens().size()); Set tokens = new HashSet<>(); analyzeResponse.getTokens().stream().map(AnalyzeToken::getTerm).forEach(t -> tokens.add(t)); @@ -109,8 +98,7 @@ public void testSynonymsUpdateable() throws FileNotFoundException, IOException { { for (String analyzerName : new String[] { synonymAnalyzerName, synonymGraphAnalyzerName }) { - Response analyzeResponse = client().admin().indices().prepareAnalyze(indexName, "foo").setAnalyzer(analyzerName) - .get(); + Response analyzeResponse = client().admin().indices().prepareAnalyze(indexName, "foo").setAnalyzer(analyzerName).get(); assertEquals(3, analyzeResponse.getTokens().size()); Set tokens = new HashSet<>(); analyzeResponse.getTokens().stream().map(AnalyzeToken::getTerm).forEach(t -> tokens.add(t)); @@ -126,8 +114,69 @@ public void testSynonymsUpdateable() throws FileNotFoundException, IOException { assertHitCount(response, 1L); } + public void testSynonymsInMultiplexerUpdateable() throws FileNotFoundException, IOException { + String synonymsFileName = "synonyms.txt"; + Path synonymsFile = setupSynonymsFile(synonymsFileName, "foo, baz"); + + final String indexName = "test"; + final String synonymAnalyzerName = "synonym_in_multiplexer_analyzer"; + assertAcked(client().admin().indices().prepareCreate(indexName) + .setSettings(Settings.builder() + .put("index.number_of_shards", 5) + .put("index.number_of_replicas", 0) + .put("analysis.analyzer." + synonymAnalyzerName + ".tokenizer", "whitespace") + .putList("analysis.analyzer." + synonymAnalyzerName + ".filter", "my_multiplexer") + .put("analysis.filter.synonym_filter.type", "synonym") + .put("analysis.filter.synonym_filter.updateable", "true") + .put("analysis.filter.synonym_filter.synonyms_path", synonymsFileName) + .put("analysis.filter.my_multiplexer.type", "multiplexer") + .putList("analysis.filter.my_multiplexer.filters", "synonym_filter")) + .addMapping("_doc", "field", "type=text,analyzer=standard,search_analyzer=" + synonymAnalyzerName)); + + client().prepareIndex(indexName, "_doc", "1").setSource("field", "foo").get(); + assertNoFailures(client().admin().indices().prepareRefresh(indexName).execute().actionGet()); + + SearchResponse response = client().prepareSearch(indexName).setQuery(QueryBuilders.matchQuery("field", "baz")).get(); + assertHitCount(response, 1L); + response = client().prepareSearch(indexName).setQuery(QueryBuilders.matchQuery("field", "buzz")).get(); + assertHitCount(response, 0L); + + Response analyzeResponse = client().admin().indices().prepareAnalyze(indexName, "foo").setAnalyzer(synonymAnalyzerName).get(); + assertEquals(2, analyzeResponse.getTokens().size()); + final Set tokens = new HashSet<>(); + analyzeResponse.getTokens().stream().map(AnalyzeToken::getTerm).forEach(t -> tokens.add(t)); + assertTrue(tokens.contains("foo")); + assertTrue(tokens.contains("baz")); + + // now update synonyms file and trigger reloading + try (PrintWriter out = new PrintWriter( + new OutputStreamWriter(Files.newOutputStream(synonymsFile, StandardOpenOption.WRITE), StandardCharsets.UTF_8))) { + out.println("foo, baz, buzz"); + } + ReloadAnalyzersResponse reloadResponse = client().execute(ReloadAnalyzerAction.INSTANCE, new ReloadAnalyzersRequest(indexName)) + .actionGet(); + assertNoFailures(reloadResponse); + Set reloadedAnalyzers = reloadResponse.getReloadDetails().get(indexName).getReloadedAnalyzers(); + assertEquals(1, reloadedAnalyzers.size()); + assertTrue(reloadedAnalyzers.contains(synonymAnalyzerName)); + + analyzeResponse = client().admin().indices().prepareAnalyze(indexName, "foo").setAnalyzer(synonymAnalyzerName).get(); + assertEquals(3, analyzeResponse.getTokens().size()); + tokens.clear(); + analyzeResponse.getTokens().stream().map(AnalyzeToken::getTerm).forEach(t -> tokens.add(t)); + assertTrue(tokens.contains("foo")); + assertTrue(tokens.contains("baz")); + assertTrue(tokens.contains("buzz")); + + response = client().prepareSearch(indexName).setQuery(QueryBuilders.matchQuery("field", "baz")).get(); + assertHitCount(response, 1L); + response = client().prepareSearch(indexName).setQuery(QueryBuilders.matchQuery("field", "buzz")).get(); + assertHitCount(response, 1L); + } + public void testUpdateableSynonymsRejectedAtIndexTime() throws FileNotFoundException, IOException { String synonymsFileName = "synonyms.txt"; + setupSynonymsFile(synonymsFileName, "foo, baz"); Path configDir = node().getEnvironment().configFile(); if (Files.exists(configDir) == false) { Files.createDirectory(configDir); @@ -143,20 +192,53 @@ public void testUpdateableSynonymsRejectedAtIndexTime() throws FileNotFoundExcep final String indexName = "test"; final String analyzerName = "my_synonym_analyzer"; + MapperException ex = expectThrows(MapperException.class, () -> client().admin().indices().prepareCreate(indexName) .setSettings(Settings.builder() - .put("index.number_of_shards", 5) - .put("index.number_of_replicas", 0) - .put("analysis.analyzer." + analyzerName + ".tokenizer", "standard") - .putList("analysis.analyzer." + analyzerName + ".filter", "lowercase", "synonym_filter") - .put("analysis.filter.synonym_filter.type", "synonym") - .put("analysis.filter.synonym_filter.updateable", "true") - .put("analysis.filter.synonym_filter.synonyms_path", synonymsFileName)) + .put("index.number_of_shards", 5) + .put("index.number_of_replicas", 0) + .put("analysis.analyzer." + analyzerName + ".tokenizer", "standard") + .putList("analysis.analyzer." + analyzerName + ".filter", "lowercase", "synonym_filter") + .put("analysis.filter.synonym_filter.type", "synonym") + .put("analysis.filter.synonym_filter.updateable", "true") + .put("analysis.filter.synonym_filter.synonyms_path", synonymsFileName)) .addMapping("_doc", "field", "type=text,analyzer=" + analyzerName).get()); - assertEquals( - "Failed to parse mapping [_doc]: analyzer [my_synonym_analyzer] " - + "contains filters [synonym_filter] that are not allowed to run in all mode.", - ex.getMessage()); + assertEquals("Failed to parse mapping [_doc]: analyzer [my_synonym_analyzer] " + + "contains filters [synonym_filter] that are not allowed to run in all mode.", ex.getMessage()); + + // same for synonym filters in multiplexer chain + ex = expectThrows(MapperException.class, + () -> client().admin().indices().prepareCreate(indexName).setSettings(Settings.builder() + .put("index.number_of_shards", 5) + .put("index.number_of_replicas", 0) + .put("analysis.analyzer." + analyzerName + ".tokenizer", "whitespace") + .putList("analysis.analyzer." + analyzerName + ".filter", "my_multiplexer") + .put("analysis.filter.synonym_filter.type", "synonym") + .put("analysis.filter.synonym_filter.updateable", "true") + .put("analysis.filter.synonym_filter.synonyms_path", synonymsFileName) + .put("analysis.filter.my_multiplexer.type", "multiplexer") + .putList("analysis.filter.my_multiplexer.filters", "synonym_filter")) + .addMapping("_doc", "field", "type=text,analyzer=" + analyzerName).get()); + + assertEquals("Failed to parse mapping [_doc]: analyzer [my_synonym_analyzer] " + + "contains filters [my_multiplexer] that are not allowed to run in all mode.", ex.getMessage()); } -} \ No newline at end of file + + private Path setupSynonymsFile(String synonymsFileName, String content) throws IOException { + Path configDir = node().getEnvironment().configFile(); + if (Files.exists(configDir) == false) { + Files.createDirectory(configDir); + } + Path synonymsFile = configDir.resolve(synonymsFileName); + if (Files.exists(synonymsFile) == false) { + Files.createFile(synonymsFile); + } + try (PrintWriter out = new PrintWriter( + new OutputStreamWriter(Files.newOutputStream(synonymsFile, StandardOpenOption.WRITE), StandardCharsets.UTF_8))) { + out.println(content); + } + return synonymsFile; + } + +}