Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move pre-configured "keyword" tokenizer to the analysis-common module #24863

Merged
merged 4 commits into from
Jun 16, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -466,7 +466,7 @@ public IndexAnalyzers build(IndexSettings indexSettings,
}
for (Map.Entry<String, AnalyzerProvider<?>> entry : normalizerProviders.entrySet()) {
processNormalizerFactory(deprecationLogger, indexSettings, entry.getKey(), entry.getValue(), normalizers,
tokenFilterFactoryFactories, charFilterFactoryFactories);
tokenizerFactoryFactories.get("keyword"), tokenFilterFactoryFactories, charFilterFactoryFactories);
}
for (Map.Entry<String, NamedAnalyzer> entry : analyzerAliases.entrySet()) {
String key = entry.getKey();
Expand Down Expand Up @@ -585,10 +585,11 @@ private void processNormalizerFactory(DeprecationLogger deprecationLogger,
String name,
AnalyzerProvider<?> normalizerFactory,
Map<String, NamedAnalyzer> normalizers,
TokenizerFactory keywordTokenizerFactory,
Map<String, TokenFilterFactory> tokenFilters,
Map<String, CharFilterFactory> charFilters) {
if (normalizerFactory instanceof CustomNormalizerProvider) {
((CustomNormalizerProvider) normalizerFactory).build(charFilters, tokenFilters);
((CustomNormalizerProvider) normalizerFactory).build(keywordTokenizerFactory, charFilters, tokenFilters);
}
Analyzer normalizerF = normalizerFactory.get();
if (normalizerF == null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@

import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.indices.analysis.PreBuiltTokenizers;

import java.util.ArrayList;
import java.util.List;
Expand All @@ -44,7 +43,8 @@ public CustomNormalizerProvider(IndexSettings indexSettings,
this.analyzerSettings = settings;
}

public void build(final Map<String, CharFilterFactory> charFilters, final Map<String, TokenFilterFactory> tokenFilters) {
public void build(final TokenizerFactory keywordTokenizerFactory, final Map<String, CharFilterFactory> charFilters,
final Map<String, TokenFilterFactory> tokenFilters) {
String tokenizerName = analyzerSettings.get("tokenizer");
if (tokenizerName != null) {
throw new IllegalArgumentException("Custom normalizer [" + name() + "] cannot configure a tokenizer");
Expand Down Expand Up @@ -83,7 +83,7 @@ public void build(final Map<String, CharFilterFactory> charFilters, final Map<St

this.customAnalyzer = new CustomAnalyzer(
"keyword",
PreBuiltTokenizers.KEYWORD.getTokenizerFactory(indexSettings.getIndexVersionCreated()),
keywordTokenizerFactory,
charFiltersList.toArray(new CharFilterFactory[charFiltersList.size()]),
tokenFilterList.toArray(new TokenFilterFactory[tokenFilterList.size()])
);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
package org.elasticsearch.indices.analysis;

import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.KeywordTokenizer;
import org.apache.lucene.analysis.core.LetterTokenizer;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.ngram.EdgeNGramTokenizer;
Expand All @@ -32,10 +31,7 @@
import org.apache.lucene.analysis.th.ThaiTokenizer;
import org.elasticsearch.Version;
import org.elasticsearch.common.regex.Regex;
import org.elasticsearch.index.analysis.CustomNormalizerProvider;
import org.elasticsearch.index.analysis.MultiTermAwareComponent;
import org.elasticsearch.index.analysis.TokenFilterFactory;
import org.elasticsearch.index.analysis.TokenizerFactory;
import org.elasticsearch.indices.analysis.PreBuiltCacheFactory.CachingStrategy;

public enum PreBuiltTokenizers {
Expand Down Expand Up @@ -68,13 +64,6 @@ protected Tokenizer create(Version version) {
}
},

KEYWORD(CachingStrategy.ONE) {
@Override
protected Tokenizer create(Version version) {
return new KeywordTokenizer();
}
},

LETTER(CachingStrategy.ONE) {
@Override
protected Tokenizer create(Version version) {
Expand Down Expand Up @@ -125,50 +114,13 @@ protected TokenFilterFactory getMultiTermComponent(Version version) {
return null;
}

protected final PreBuiltCacheFactory.PreBuiltCache<TokenizerFactory> cache;
private final CachingStrategy cachingStrategy;

PreBuiltTokenizers(CachingStrategy cachingStrategy) {
this.cachingStrategy = cachingStrategy;
cache = PreBuiltCacheFactory.getCache(cachingStrategy);
}

public CachingStrategy getCachingStrategy() {
return cachingStrategy;
}

private interface MultiTermAwareTokenizerFactory extends TokenizerFactory, MultiTermAwareComponent {}

/**
* Old style resolution for {@link TokenizerFactory}. Exists entirely to keep
* {@link CustomNormalizerProvider#build(java.util.Map, java.util.Map)} working during the migration.
*/
public synchronized TokenizerFactory getTokenizerFactory(final Version version) {
TokenizerFactory tokenizerFactory = cache.get(version);
if (tokenizerFactory == null) {
if (getMultiTermComponent(version) != null) {
tokenizerFactory = new MultiTermAwareTokenizerFactory() {
@Override
public Tokenizer create() {
return PreBuiltTokenizers.this.create(version);
}

@Override
public Object getMultiTermComponent() {
return PreBuiltTokenizers.this.getMultiTermComponent(version);
}
};
} else {
tokenizerFactory = new TokenizerFactory() {
@Override
public Tokenizer create() {
return PreBuiltTokenizers.this.create(version);
}
};
}
cache.put(version, tokenizerFactory);
}

return tokenizerFactory;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
import org.apache.lucene.analysis.ckb.SoraniNormalizationFilter;
import org.apache.lucene.analysis.commongrams.CommonGramsFilter;
import org.apache.lucene.analysis.core.DecimalDigitFilter;
import org.apache.lucene.analysis.core.KeywordTokenizer;
import org.apache.lucene.analysis.core.LowerCaseTokenizer;
import org.apache.lucene.analysis.core.StopAnalyzer;
import org.apache.lucene.analysis.core.UpperCaseFilter;
Expand Down Expand Up @@ -207,6 +208,7 @@ public List<PreConfiguredTokenFilter> getPreConfiguredTokenFilters() {
@Override
public List<PreConfiguredTokenizer> getPreConfiguredTokenizers() {
List<PreConfiguredTokenizer> tokenizers = new ArrayList<>();
tokenizers.add(PreConfiguredTokenizer.singleton("keyword", KeywordTokenizer::new, null));
tokenizers.add(PreConfiguredTokenizer.singleton("lowercase", LowerCaseTokenizer::new, () -> new TokenFilterFactory() {
@Override
public String name() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,7 @@ protected Map<String, Class<?>> getPreConfiguredTokenFilters() {
@Override
protected Map<String, Class<?>> getPreConfiguredTokenizers() {
Map<String, Class<?>> filters = new TreeMap<>(super.getPreConfiguredTokenizers());
filters.put("keyword", null);
filters.put("lowercase", null);
return filters;
}
Expand Down