Skip to content

Commit

Permalink
Make Multiplexer inherit filter chains analysis mode (#50662)
Browse files Browse the repository at this point in the history
Currently, if an updateable synonym filter is included in a multiplexer filter,
it is not reloaded via the _reload_search_analyzers because the multiplexer
itself doesn't pass on the analysis mode of the filters it contains, so its not
recognized as "updateable" in itself. Instead we can check and merge the
AnalysisMode settings of all filters in the multiplexer and use the resulting
mode (e.g. search-time only) for the multiplexer itself, thus making any synonym
filters contained in it reloadable.  This, of course, will also make the
analyzers using the multiplexer be usable at search-time only.

Closes #50554
  • Loading branch information
Christoph Büscher committed Jan 8, 2020
1 parent 78c9eee commit b1b4282
Show file tree
Hide file tree
Showing 3 changed files with 135 additions and 42 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
import org.elasticsearch.index.analysis.AnalysisMode;
import org.elasticsearch.index.analysis.CharFilterFactory;
import org.elasticsearch.index.analysis.TokenFilterFactory;
import org.elasticsearch.index.analysis.TokenizerFactory;
Expand Down Expand Up @@ -84,12 +85,15 @@ public TokenFilterFactory getChainAwareTokenFilterFactory(TokenizerFactory token
if (preserveOriginal) {
filters.add(IDENTITY_FILTER);
}
// also merge and transfer token filter analysis modes with analyzer
AnalysisMode mode = AnalysisMode.ALL;
for (String filter : filterNames) {
String[] parts = Strings.tokenizeToStringArray(filter, ",");
if (parts.length == 1) {
TokenFilterFactory factory = resolveFilterFactory(allFilters, parts[0]);
factory = factory.getChainAwareTokenFilterFactory(tokenizer, charFilters, previousTokenFilters, allFilters);
filters.add(factory);
mode = mode.merge(factory.getAnalysisMode());
} else {
List<TokenFilterFactory> existingChain = new ArrayList<>(previousTokenFilters);
List<TokenFilterFactory> chain = new ArrayList<>();
Expand All @@ -98,10 +102,12 @@ public TokenFilterFactory getChainAwareTokenFilterFactory(TokenizerFactory token
factory = factory.getChainAwareTokenFilterFactory(tokenizer, charFilters, existingChain, allFilters);
chain.add(factory);
existingChain.add(factory);
mode = mode.merge(factory.getAnalysisMode());
}
filters.add(chainFilters(filter, chain));
}
}
final AnalysisMode analysisMode = mode;

return new TokenFilterFactory() {
@Override
Expand Down Expand Up @@ -133,6 +139,11 @@ public TokenFilterFactory getSynonymFilter() {
+ "] cannot be used to parse synonyms unless [preserve_original] is [true]");
}
}

@Override
public AnalysisMode getAnalysisMode() {
return analysisMode;
}
};
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,5 +78,5 @@ public String getReadableName() {
* <li>INDEX_TIME.merge(SEARCH_TIME) throws an {@link IllegalStateException}</li>
* </ul>
*/
abstract AnalysisMode merge(AnalysisMode other);
public abstract AnalysisMode merge(AnalysisMode other);
}
Original file line number Diff line number Diff line change
Expand Up @@ -43,35 +43,25 @@ protected Collection<Class<? extends Plugin>> getPlugins() {

public void testSynonymsUpdateable() throws FileNotFoundException, IOException {
String synonymsFileName = "synonyms.txt";
Path configDir = node().getEnvironment().configFile();
if (Files.exists(configDir) == false) {
Files.createDirectory(configDir);
}
Path synonymsFile = configDir.resolve(synonymsFileName);
if (Files.exists(synonymsFile) == false) {
Files.createFile(synonymsFile);
}
try (PrintWriter out = new PrintWriter(
new OutputStreamWriter(Files.newOutputStream(synonymsFile, StandardOpenOption.WRITE), StandardCharsets.UTF_8))) {
out.println("foo, baz");
}
Path synonymsFile = setupSynonymsFile(synonymsFileName, "foo, baz");

final String indexName = "test";
final String synonymAnalyzerName = "synonym_analyzer";
final String synonymGraphAnalyzerName = "synonym_graph_analyzer";
assertAcked(client().admin().indices().prepareCreate(indexName).setSettings(Settings.builder()
.put("index.number_of_shards", 5)
.put("index.number_of_replicas", 0)
.put("analysis.analyzer." + synonymAnalyzerName + ".tokenizer", "standard")
.putList("analysis.analyzer." + synonymAnalyzerName + ".filter", "lowercase", "synonym_filter")
.put("analysis.analyzer." + synonymGraphAnalyzerName + ".tokenizer", "standard")
.putList("analysis.analyzer." + synonymGraphAnalyzerName + ".filter", "lowercase", "synonym_graph_filter")
.put("analysis.filter.synonym_filter.type", "synonym")
.put("analysis.filter.synonym_filter.updateable", "true")
.put("analysis.filter.synonym_filter.synonyms_path", synonymsFileName)
.put("analysis.filter.synonym_graph_filter.type", "synonym_graph")
.put("analysis.filter.synonym_graph_filter.updateable", "true")
.put("analysis.filter.synonym_graph_filter.synonyms_path", synonymsFileName))
assertAcked(client().admin().indices().prepareCreate(indexName)
.setSettings(Settings.builder()
.put("index.number_of_shards", 5)
.put("index.number_of_replicas", 0)
.put("analysis.analyzer." + synonymAnalyzerName + ".tokenizer", "standard")
.putList("analysis.analyzer." + synonymAnalyzerName + ".filter", "lowercase", "synonym_filter")
.put("analysis.analyzer." + synonymGraphAnalyzerName + ".tokenizer", "standard")
.putList("analysis.analyzer." + synonymGraphAnalyzerName + ".filter", "lowercase", "synonym_graph_filter")
.put("analysis.filter.synonym_filter.type", "synonym")
.put("analysis.filter.synonym_filter.updateable", "true")
.put("analysis.filter.synonym_filter.synonyms_path", synonymsFileName)
.put("analysis.filter.synonym_graph_filter.type", "synonym_graph")
.put("analysis.filter.synonym_graph_filter.updateable", "true")
.put("analysis.filter.synonym_graph_filter.synonyms_path", synonymsFileName))
.addMapping("_doc", "field", "type=text,analyzer=standard,search_analyzer=" + synonymAnalyzerName));

client().prepareIndex(indexName, "_doc", "1").setSource("field", "Foo").get();
Expand All @@ -84,8 +74,7 @@ public void testSynonymsUpdateable() throws FileNotFoundException, IOException {

{
for (String analyzerName : new String[] { synonymAnalyzerName, synonymGraphAnalyzerName }) {
Response analyzeResponse = client().admin().indices().prepareAnalyze(indexName, "foo").setAnalyzer(analyzerName)
.get();
Response analyzeResponse = client().admin().indices().prepareAnalyze(indexName, "foo").setAnalyzer(analyzerName).get();
assertEquals(2, analyzeResponse.getTokens().size());
Set<String> tokens = new HashSet<>();
analyzeResponse.getTokens().stream().map(AnalyzeToken::getTerm).forEach(t -> tokens.add(t));
Expand All @@ -109,8 +98,7 @@ public void testSynonymsUpdateable() throws FileNotFoundException, IOException {

{
for (String analyzerName : new String[] { synonymAnalyzerName, synonymGraphAnalyzerName }) {
Response analyzeResponse = client().admin().indices().prepareAnalyze(indexName, "foo").setAnalyzer(analyzerName)
.get();
Response analyzeResponse = client().admin().indices().prepareAnalyze(indexName, "foo").setAnalyzer(analyzerName).get();
assertEquals(3, analyzeResponse.getTokens().size());
Set<String> tokens = new HashSet<>();
analyzeResponse.getTokens().stream().map(AnalyzeToken::getTerm).forEach(t -> tokens.add(t));
Expand All @@ -126,8 +114,69 @@ public void testSynonymsUpdateable() throws FileNotFoundException, IOException {
assertHitCount(response, 1L);
}

public void testSynonymsInMultiplexerUpdateable() throws FileNotFoundException, IOException {
String synonymsFileName = "synonyms.txt";
Path synonymsFile = setupSynonymsFile(synonymsFileName, "foo, baz");

final String indexName = "test";
final String synonymAnalyzerName = "synonym_in_multiplexer_analyzer";
assertAcked(client().admin().indices().prepareCreate(indexName)
.setSettings(Settings.builder()
.put("index.number_of_shards", 5)
.put("index.number_of_replicas", 0)
.put("analysis.analyzer." + synonymAnalyzerName + ".tokenizer", "whitespace")
.putList("analysis.analyzer." + synonymAnalyzerName + ".filter", "my_multiplexer")
.put("analysis.filter.synonym_filter.type", "synonym")
.put("analysis.filter.synonym_filter.updateable", "true")
.put("analysis.filter.synonym_filter.synonyms_path", synonymsFileName)
.put("analysis.filter.my_multiplexer.type", "multiplexer")
.putList("analysis.filter.my_multiplexer.filters", "synonym_filter"))
.addMapping("_doc", "field", "type=text,analyzer=standard,search_analyzer=" + synonymAnalyzerName));

client().prepareIndex(indexName, "_doc", "1").setSource("field", "foo").get();
assertNoFailures(client().admin().indices().prepareRefresh(indexName).execute().actionGet());

SearchResponse response = client().prepareSearch(indexName).setQuery(QueryBuilders.matchQuery("field", "baz")).get();
assertHitCount(response, 1L);
response = client().prepareSearch(indexName).setQuery(QueryBuilders.matchQuery("field", "buzz")).get();
assertHitCount(response, 0L);

Response analyzeResponse = client().admin().indices().prepareAnalyze(indexName, "foo").setAnalyzer(synonymAnalyzerName).get();
assertEquals(2, analyzeResponse.getTokens().size());
final Set<String> tokens = new HashSet<>();
analyzeResponse.getTokens().stream().map(AnalyzeToken::getTerm).forEach(t -> tokens.add(t));
assertTrue(tokens.contains("foo"));
assertTrue(tokens.contains("baz"));

// now update synonyms file and trigger reloading
try (PrintWriter out = new PrintWriter(
new OutputStreamWriter(Files.newOutputStream(synonymsFile, StandardOpenOption.WRITE), StandardCharsets.UTF_8))) {
out.println("foo, baz, buzz");
}
ReloadAnalyzersResponse reloadResponse = client().execute(ReloadAnalyzerAction.INSTANCE, new ReloadAnalyzersRequest(indexName))
.actionGet();
assertNoFailures(reloadResponse);
Set<String> reloadedAnalyzers = reloadResponse.getReloadDetails().get(indexName).getReloadedAnalyzers();
assertEquals(1, reloadedAnalyzers.size());
assertTrue(reloadedAnalyzers.contains(synonymAnalyzerName));

analyzeResponse = client().admin().indices().prepareAnalyze(indexName, "foo").setAnalyzer(synonymAnalyzerName).get();
assertEquals(3, analyzeResponse.getTokens().size());
tokens.clear();
analyzeResponse.getTokens().stream().map(AnalyzeToken::getTerm).forEach(t -> tokens.add(t));
assertTrue(tokens.contains("foo"));
assertTrue(tokens.contains("baz"));
assertTrue(tokens.contains("buzz"));

response = client().prepareSearch(indexName).setQuery(QueryBuilders.matchQuery("field", "baz")).get();
assertHitCount(response, 1L);
response = client().prepareSearch(indexName).setQuery(QueryBuilders.matchQuery("field", "buzz")).get();
assertHitCount(response, 1L);
}

public void testUpdateableSynonymsRejectedAtIndexTime() throws FileNotFoundException, IOException {
String synonymsFileName = "synonyms.txt";
setupSynonymsFile(synonymsFileName, "foo, baz");
Path configDir = node().getEnvironment().configFile();
if (Files.exists(configDir) == false) {
Files.createDirectory(configDir);
Expand All @@ -143,20 +192,53 @@ public void testUpdateableSynonymsRejectedAtIndexTime() throws FileNotFoundExcep

final String indexName = "test";
final String analyzerName = "my_synonym_analyzer";

MapperException ex = expectThrows(MapperException.class, () -> client().admin().indices().prepareCreate(indexName)
.setSettings(Settings.builder()
.put("index.number_of_shards", 5)
.put("index.number_of_replicas", 0)
.put("analysis.analyzer." + analyzerName + ".tokenizer", "standard")
.putList("analysis.analyzer." + analyzerName + ".filter", "lowercase", "synonym_filter")
.put("analysis.filter.synonym_filter.type", "synonym")
.put("analysis.filter.synonym_filter.updateable", "true")
.put("analysis.filter.synonym_filter.synonyms_path", synonymsFileName))
.put("index.number_of_shards", 5)
.put("index.number_of_replicas", 0)
.put("analysis.analyzer." + analyzerName + ".tokenizer", "standard")
.putList("analysis.analyzer." + analyzerName + ".filter", "lowercase", "synonym_filter")
.put("analysis.filter.synonym_filter.type", "synonym")
.put("analysis.filter.synonym_filter.updateable", "true")
.put("analysis.filter.synonym_filter.synonyms_path", synonymsFileName))
.addMapping("_doc", "field", "type=text,analyzer=" + analyzerName).get());

assertEquals(
"Failed to parse mapping [_doc]: analyzer [my_synonym_analyzer] "
+ "contains filters [synonym_filter] that are not allowed to run in all mode.",
ex.getMessage());
assertEquals("Failed to parse mapping [_doc]: analyzer [my_synonym_analyzer] "
+ "contains filters [synonym_filter] that are not allowed to run in all mode.", ex.getMessage());

// same for synonym filters in multiplexer chain
ex = expectThrows(MapperException.class,
() -> client().admin().indices().prepareCreate(indexName).setSettings(Settings.builder()
.put("index.number_of_shards", 5)
.put("index.number_of_replicas", 0)
.put("analysis.analyzer." + analyzerName + ".tokenizer", "whitespace")
.putList("analysis.analyzer." + analyzerName + ".filter", "my_multiplexer")
.put("analysis.filter.synonym_filter.type", "synonym")
.put("analysis.filter.synonym_filter.updateable", "true")
.put("analysis.filter.synonym_filter.synonyms_path", synonymsFileName)
.put("analysis.filter.my_multiplexer.type", "multiplexer")
.putList("analysis.filter.my_multiplexer.filters", "synonym_filter"))
.addMapping("_doc", "field", "type=text,analyzer=" + analyzerName).get());

assertEquals("Failed to parse mapping [_doc]: analyzer [my_synonym_analyzer] "
+ "contains filters [my_multiplexer] that are not allowed to run in all mode.", ex.getMessage());
}
}

private Path setupSynonymsFile(String synonymsFileName, String content) throws IOException {
Path configDir = node().getEnvironment().configFile();
if (Files.exists(configDir) == false) {
Files.createDirectory(configDir);
}
Path synonymsFile = configDir.resolve(synonymsFileName);
if (Files.exists(synonymsFile) == false) {
Files.createFile(synonymsFile);
}
try (PrintWriter out = new PrintWriter(
new OutputStreamWriter(Files.newOutputStream(synonymsFile, StandardOpenOption.WRITE), StandardCharsets.UTF_8))) {
out.println(content);
}
return synonymsFile;
}

}

0 comments on commit b1b4282

Please sign in to comment.