From 41545324ff55fa89f5852bf9dc5a8da19d43f633 Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Wed, 24 May 2017 10:49:18 -0400 Subject: [PATCH 1/3] Repipe keyword analyzer --- .../org/elasticsearch/index/analysis/AnalysisRegistry.java | 6 +++--- .../index/analysis/CustomNormalizerProvider.java | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/core/src/main/java/org/elasticsearch/index/analysis/AnalysisRegistry.java b/core/src/main/java/org/elasticsearch/index/analysis/AnalysisRegistry.java index 5d099267c79e8..2d07a5c4bb284 100644 --- a/core/src/main/java/org/elasticsearch/index/analysis/AnalysisRegistry.java +++ b/core/src/main/java/org/elasticsearch/index/analysis/AnalysisRegistry.java @@ -36,7 +36,6 @@ import org.elasticsearch.indices.analysis.AnalysisModule.AnalysisProvider; import org.elasticsearch.indices.analysis.PreBuiltAnalyzers; import org.elasticsearch.indices.analysis.PreBuiltCharFilters; -import org.elasticsearch.indices.analysis.PreBuiltTokenizers; import java.io.Closeable; import java.io.IOException; @@ -475,7 +474,7 @@ public IndexAnalyzers build(IndexSettings indexSettings, } for (Map.Entry> entry : normalizerProviders.entrySet()) { processNormalizerFactory(deprecationLogger, indexSettings, entry.getKey(), entry.getValue(), normalizers, - tokenFilterFactoryFactories, charFilterFactoryFactories); + tokenizerFactoryFactories.get("keyword"), tokenFilterFactoryFactories, charFilterFactoryFactories); } for (Map.Entry entry : analyzerAliases.entrySet()) { String key = entry.getKey(); @@ -594,10 +593,11 @@ private void processNormalizerFactory(DeprecationLogger deprecationLogger, String name, AnalyzerProvider normalizerFactory, Map normalizers, + TokenizerFactory keywordTokenizerFactory, Map tokenFilters, Map charFilters) { if (normalizerFactory instanceof CustomNormalizerProvider) { - ((CustomNormalizerProvider) normalizerFactory).build(charFilters, tokenFilters); + ((CustomNormalizerProvider) normalizerFactory).build(keywordTokenizerFactory, charFilters, tokenFilters); } Analyzer normalizerF = normalizerFactory.get(); if (normalizerF == null) { diff --git a/core/src/main/java/org/elasticsearch/index/analysis/CustomNormalizerProvider.java b/core/src/main/java/org/elasticsearch/index/analysis/CustomNormalizerProvider.java index 2fcc987df6aa3..5c44ab2108bef 100644 --- a/core/src/main/java/org/elasticsearch/index/analysis/CustomNormalizerProvider.java +++ b/core/src/main/java/org/elasticsearch/index/analysis/CustomNormalizerProvider.java @@ -21,7 +21,6 @@ import org.elasticsearch.common.settings.Settings; import org.elasticsearch.index.IndexSettings; -import org.elasticsearch.indices.analysis.PreBuiltTokenizers; import java.util.ArrayList; import java.util.List; @@ -44,7 +43,8 @@ public CustomNormalizerProvider(IndexSettings indexSettings, this.analyzerSettings = settings; } - public void build(final Map charFilters, final Map tokenFilters) { + public void build(final TokenizerFactory keywordTokenizerFactory, final Map charFilters, + final Map tokenFilters) { String tokenizerName = analyzerSettings.get("tokenizer"); if (tokenizerName != null) { throw new IllegalArgumentException("Custom normalizer [" + name() + "] cannot configure a tokenizer"); @@ -82,7 +82,7 @@ public void build(final Map charFilters, final Map Date: Wed, 24 May 2017 10:52:29 -0400 Subject: [PATCH 2/3] Drop now unused --- .../indices/analysis/PreBuiltTokenizers.java | 53 ------------------- 1 file changed, 53 deletions(-) diff --git a/core/src/main/java/org/elasticsearch/indices/analysis/PreBuiltTokenizers.java b/core/src/main/java/org/elasticsearch/indices/analysis/PreBuiltTokenizers.java index 52e7ff6c9c4fa..f7f0427c5e260 100644 --- a/core/src/main/java/org/elasticsearch/indices/analysis/PreBuiltTokenizers.java +++ b/core/src/main/java/org/elasticsearch/indices/analysis/PreBuiltTokenizers.java @@ -32,14 +32,9 @@ import org.apache.lucene.analysis.th.ThaiTokenizer; import org.elasticsearch.Version; import org.elasticsearch.common.regex.Regex; -import org.elasticsearch.index.analysis.CustomNormalizerProvider; -import org.elasticsearch.index.analysis.MultiTermAwareComponent; import org.elasticsearch.index.analysis.TokenFilterFactory; -import org.elasticsearch.index.analysis.TokenizerFactory; import org.elasticsearch.indices.analysis.PreBuiltCacheFactory.CachingStrategy; -import java.util.Locale; - public enum PreBuiltTokenizers { STANDARD(CachingStrategy.ONE) { @@ -127,61 +122,13 @@ protected TokenFilterFactory getMultiTermComponent(Version version) { return null; } - protected final PreBuiltCacheFactory.PreBuiltCache cache; private final CachingStrategy cachingStrategy; PreBuiltTokenizers(CachingStrategy cachingStrategy) { this.cachingStrategy = cachingStrategy; - cache = PreBuiltCacheFactory.getCache(cachingStrategy); } public CachingStrategy getCachingStrategy() { return cachingStrategy; } - - private interface MultiTermAwareTokenizerFactory extends TokenizerFactory, MultiTermAwareComponent {} - - /** - * Old style resolution for {@link TokenizerFactory}. Exists entirely to keep - * {@link CustomNormalizerProvider#build(java.util.Map, java.util.Map)} working during the migration. - */ - public synchronized TokenizerFactory getTokenizerFactory(final Version version) { - TokenizerFactory tokenizerFactory = cache.get(version); - if (tokenizerFactory == null) { - final String finalName = name().toLowerCase(Locale.ROOT); - if (getMultiTermComponent(version) != null) { - tokenizerFactory = new MultiTermAwareTokenizerFactory() { - @Override - public String name() { - return finalName; - } - - @Override - public Tokenizer create() { - return PreBuiltTokenizers.this.create(version); - } - - @Override - public Object getMultiTermComponent() { - return PreBuiltTokenizers.this.getMultiTermComponent(version); - } - }; - } else { - tokenizerFactory = new TokenizerFactory() { - @Override - public String name() { - return finalName; - } - - @Override - public Tokenizer create() { - return PreBuiltTokenizers.this.create(version); - } - }; - } - cache.put(version, tokenizerFactory); - } - - return tokenizerFactory; - } } From 40dee98c881648e3570fd026608b38f4b259e5a4 Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Wed, 24 May 2017 11:29:54 -0400 Subject: [PATCH 3/3] Move pre-configured keyword tokenizer to analysis common --- .../elasticsearch/indices/analysis/PreBuiltTokenizers.java | 7 ------- .../analysis/common/CommonAnalysisPlugin.java | 2 ++ .../analysis/common/CommonAnalysisFactoryTests.java | 3 ++- 3 files changed, 4 insertions(+), 8 deletions(-) diff --git a/core/src/main/java/org/elasticsearch/indices/analysis/PreBuiltTokenizers.java b/core/src/main/java/org/elasticsearch/indices/analysis/PreBuiltTokenizers.java index f7f0427c5e260..b23289bab40c3 100644 --- a/core/src/main/java/org/elasticsearch/indices/analysis/PreBuiltTokenizers.java +++ b/core/src/main/java/org/elasticsearch/indices/analysis/PreBuiltTokenizers.java @@ -65,13 +65,6 @@ protected Tokenizer create(Version version) { } }, - KEYWORD(CachingStrategy.ONE) { - @Override - protected Tokenizer create(Version version) { - return new KeywordTokenizer(); - } - }, - LETTER(CachingStrategy.ONE) { @Override protected Tokenizer create(Version version) { diff --git a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java index fcca4f7eddff0..bb880e74ac5d5 100644 --- a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java @@ -31,6 +31,7 @@ import org.apache.lucene.analysis.ckb.SoraniNormalizationFilter; import org.apache.lucene.analysis.commongrams.CommonGramsFilter; import org.apache.lucene.analysis.core.DecimalDigitFilter; +import org.apache.lucene.analysis.core.KeywordTokenizer; import org.apache.lucene.analysis.core.LowerCaseTokenizer; import org.apache.lucene.analysis.core.StopAnalyzer; import org.apache.lucene.analysis.core.UpperCaseFilter; @@ -182,6 +183,7 @@ public List getPreConfiguredTokenFilters() { @Override public List getPreConfiguredTokenizers() { List tokenizers = new ArrayList<>(); + tokenizers.add(PreConfiguredTokenizer.singleton("keyword", KeywordTokenizer::new, null)); tokenizers.add(PreConfiguredTokenizer.singleton("lowercase", LowerCaseTokenizer::new, () -> new TokenFilterFactory() { @Override public String name() { diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisFactoryTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisFactoryTests.java index 3ce7fd1d301b9..bbe497368b67a 100644 --- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisFactoryTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisFactoryTests.java @@ -120,7 +120,8 @@ protected Map> getPreConfiguredTokenFilters() { @Override protected Map> getPreConfiguredTokenizers() { Map> filters = new TreeMap<>(super.getPreConfiguredTokenFilters()); - + filters.put("keyword", null); + filters.put("lowercase", null); return filters; }