Skip to content

Commit 4c2ce0a

Browse files
author
Christoph Büscher
committed
Remove nGram and edgeNGram token filter names (elastic#38911)
In elastic#30209 we deprecated the camel case `nGram` filter name in favour of `ngram` and did the same for `edgeNGram` and `edge_ngram`. Using these names has been deprecated since 6.4 and is issuing deprecation warnings since then. I think we can remove these filters in 8.0. In a backport of this PR I would change what was a dreprecation warning from 6.4. to an error starting with new indices created in 7.0.
1 parent 31163f0 commit 4c2ce0a

File tree

8 files changed

+81
-21
lines changed

8 files changed

+81
-21
lines changed

docs/reference/analysis/tokenfilters/edgengram-tokenfilter.asciidoc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
[[analysis-edgengram-tokenfilter]]
22
=== Edge NGram Token Filter
33

4-
A token filter of type `edgeNGram`.
4+
A token filter of type `edge_ngram`.
55

6-
The following are settings that can be set for a `edgeNGram` token
6+
The following are settings that can be set for a `edge_ngram` token
77
filter type:
88

99
[cols="<,<",options="header",]

docs/reference/analysis/tokenfilters/ngram-tokenfilter.asciidoc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
[[analysis-ngram-tokenfilter]]
22
=== NGram Token Filter
33

4-
A token filter of type `nGram`.
4+
A token filter of type `ngram`.
55

6-
The following are settings that can be set for a `nGram` token filter
6+
The following are settings that can be set for a `ngram` token filter
77
type:
88

99
[cols="<,<",options="header",]

docs/reference/migration/migrate_7_0/analysis.asciidoc

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,4 +38,12 @@ The `standard` token filter has been removed because it doesn't change anything
3838
The `standard_html_strip` analyzer has been deprecated, and should be replaced
3939
with a combination of the `standard` tokenizer and `html_strip` char_filter.
4040
Indexes created using this analyzer will still be readable in elasticsearch 7.0,
41-
but it will not be possible to create new indexes using it.
41+
but it will not be possible to create new indexes using it.
42+
43+
[float]
44+
==== The deprecated `nGram` and `edgeNGram` token filter cannot be used on new indices
45+
46+
The `nGram` and `edgeNGram` token filter names have been deprecated since version 6.4.
47+
Indexes created using these token filters will still be readable in elasticsearch 7.0.
48+
Using them on new indices from version 7.0.0 on however will throw an error when indexing
49+
or analyzing documents. Both names should be replaces by `ngram` or `edge_ngram` respectively.

modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -415,7 +415,11 @@ public List<PreConfiguredTokenFilter> getPreConfiguredTokenFilters() {
415415
filters.add(PreConfiguredTokenFilter.singleton("edge_ngram", false, false, input ->
416416
new EdgeNGramTokenFilter(input, 1)));
417417
filters.add(PreConfiguredTokenFilter.singletonWithVersion("edgeNGram", false, false, (reader, version) -> {
418-
if (version.onOrAfter(org.elasticsearch.Version.V_6_4_0)) {
418+
if (version.onOrAfter(org.elasticsearch.Version.V_7_0_0)) {
419+
throw new IllegalArgumentException(
420+
"The [edgeNGram] token filter name was deprecated in 6.4 and cannot be used in new indices. "
421+
+ "Please change the filter name to [edge_ngram] instead.");
422+
} else if (version.onOrAfter(org.elasticsearch.Version.V_6_4_0)) {
419423
deprecationLogger.deprecatedAndMaybeLog("edgeNGram_deprecation",
420424
"The [edgeNGram] token filter name is deprecated and will be removed in a future version. "
421425
+ "Please change the filter name to [edge_ngram] instead.");
@@ -439,7 +443,10 @@ public List<PreConfiguredTokenFilter> getPreConfiguredTokenFilters() {
439443
LimitTokenCountFilterFactory.DEFAULT_CONSUME_ALL_TOKENS)));
440444
filters.add(PreConfiguredTokenFilter.singleton("ngram", false, false, reader -> new NGramTokenFilter(reader, 1, 2, false)));
441445
filters.add(PreConfiguredTokenFilter.singletonWithVersion("nGram", false, false, (reader, version) -> {
442-
if (version.onOrAfter(org.elasticsearch.Version.V_6_4_0)) {
446+
if (version.onOrAfter(org.elasticsearch.Version.V_7_0_0)) {
447+
throw new IllegalArgumentException("The [nGram] token filter name was deprecated in 6.4 and cannot be used in new indices. "
448+
+ "Please change the filter name to [ngram] instead.");
449+
} else if (version.onOrAfter(org.elasticsearch.Version.V_6_4_0)) {
443450
deprecationLogger.deprecatedAndMaybeLog("nGram_deprecation",
444451
"The [nGram] token filter name is deprecated and will be removed in a future version. "
445452
+ "Please change the filter name to [ngram] instead.");

modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisPluginTests.java

Lines changed: 47 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ public class CommonAnalysisPluginTests extends ESTestCase {
4545
*/
4646
public void testNGramDeprecationWarning() throws IOException {
4747
Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
48-
.put(IndexMetaData.SETTING_VERSION_CREATED, VersionUtils.randomVersionBetween(random(), Version.V_6_4_0, Version.CURRENT))
48+
.put(IndexMetaData.SETTING_VERSION_CREATED, VersionUtils.randomVersionBetween(random(), Version.V_6_4_0, Version.V_6_7_0))
4949
.build();
5050

5151
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings);
@@ -61,6 +61,28 @@ public void testNGramDeprecationWarning() throws IOException {
6161
}
6262
}
6363

64+
/**
65+
* Check that the deprecated name "nGram" throws an error since 7.0.0
66+
*/
67+
public void testNGramDeprecationError() throws IOException {
68+
Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
69+
.put(IndexMetaData.SETTING_VERSION_CREATED, VersionUtils.randomVersionBetween(random(), Version.V_7_0_0, null))
70+
.build();
71+
72+
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings);
73+
try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) {
74+
Map<String, TokenFilterFactory> tokenFilters = createTestAnalysis(idxSettings, settings, commonAnalysisPlugin).tokenFilter;
75+
TokenFilterFactory tokenFilterFactory = tokenFilters.get("nGram");
76+
Tokenizer tokenizer = new MockTokenizer();
77+
tokenizer.setReader(new StringReader("foo bar"));
78+
IllegalArgumentException ex = expectThrows(IllegalArgumentException.class, () -> tokenFilterFactory.create(tokenizer));
79+
assertEquals(
80+
"The [nGram] token filter name was deprecated in 6.4 and cannot be used in new indices. Please change the filter"
81+
+ " name to [ngram] instead.",
82+
ex.getMessage());
83+
}
84+
}
85+
6486
/**
6587
* Check that the deprecated name "nGram" does NOT issues a deprecation warning for indices created before 6.4.0
6688
*/
@@ -80,12 +102,13 @@ public void testNGramNoDeprecationWarningPre6_4() throws IOException {
80102
}
81103
}
82104

105+
83106
/**
84107
* Check that the deprecated name "edgeNGram" issues a deprecation warning for indices created since 6.3.0
85108
*/
86109
public void testEdgeNGramDeprecationWarning() throws IOException {
87110
Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
88-
.put(IndexMetaData.SETTING_VERSION_CREATED, VersionUtils.randomVersionBetween(random(), Version.V_6_4_0, Version.CURRENT))
111+
.put(IndexMetaData.SETTING_VERSION_CREATED, VersionUtils.randomVersionBetween(random(), Version.V_6_4_0, Version.V_6_7_0))
89112
.build();
90113

91114
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings);
@@ -101,6 +124,28 @@ public void testEdgeNGramDeprecationWarning() throws IOException {
101124
}
102125
}
103126

127+
/**
128+
* Check that the deprecated name "edgeNGram" throws an error for indices created since 7.0.0
129+
*/
130+
public void testEdgeNGramDeprecationError() throws IOException {
131+
Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
132+
.put(IndexMetaData.SETTING_VERSION_CREATED, VersionUtils.randomVersionBetween(random(), Version.V_7_0_0, null))
133+
.build();
134+
135+
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings);
136+
try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) {
137+
Map<String, TokenFilterFactory> tokenFilters = createTestAnalysis(idxSettings, settings, commonAnalysisPlugin).tokenFilter;
138+
TokenFilterFactory tokenFilterFactory = tokenFilters.get("edgeNGram");
139+
Tokenizer tokenizer = new MockTokenizer();
140+
tokenizer.setReader(new StringReader("foo bar"));
141+
IllegalArgumentException ex = expectThrows(IllegalArgumentException.class, () -> tokenFilterFactory.create(tokenizer));
142+
assertEquals(
143+
"The [edgeNGram] token filter name was deprecated in 6.4 and cannot be used in new indices. Please change the filter"
144+
+ " name to [edge_ngram] instead.",
145+
ex.getMessage());
146+
}
147+
}
148+
104149
/**
105150
* Check that the deprecated name "edgeNGram" does NOT issues a deprecation warning for indices created before 6.4.0
106151
*/

modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/HighlighterWithAnalyzersTests.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ public void testNgramHighlightingWithBrokenPositions() throws IOException {
8181
.put("analysis.tokenizer.autocomplete.max_gram", 20)
8282
.put("analysis.tokenizer.autocomplete.min_gram", 1)
8383
.put("analysis.tokenizer.autocomplete.token_chars", "letter,digit")
84-
.put("analysis.tokenizer.autocomplete.type", "nGram")
84+
.put("analysis.tokenizer.autocomplete.type", "ngram")
8585
.put("analysis.filter.wordDelimiter.type", "word_delimiter")
8686
.putList("analysis.filter.wordDelimiter.type_table",
8787
"& => ALPHANUM", "| => ALPHANUM", "! => ALPHANUM",

modules/analysis-common/src/test/resources/rest-api-spec/test/analysis-common/30_tokenizers.yml

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -23,14 +23,14 @@
2323
- match: { detail.tokenizer.tokens.0.token: Foo Bar! }
2424

2525
---
26-
"nGram":
26+
"ngram":
2727
- do:
2828
indices.analyze:
2929
body:
3030
text: good
3131
explain: true
3232
tokenizer:
33-
type: nGram
33+
type: ngram
3434
min_gram: 2
3535
max_gram: 2
3636
- length: { detail.tokenizer.tokens: 3 }
@@ -40,7 +40,7 @@
4040
- match: { detail.tokenizer.tokens.2.token: od }
4141

4242
---
43-
"nGram_exception":
43+
"ngram_exception":
4444
- skip:
4545
version: " - 6.99.99"
4646
reason: only starting from version 7.x this throws an error
@@ -51,7 +51,7 @@
5151
text: good
5252
explain: true
5353
tokenizer:
54-
type: nGram
54+
type: ngram
5555
min_gram: 2
5656
max_gram: 4
5757
---
@@ -133,7 +133,7 @@
133133
text: "foobar"
134134
explain: true
135135
tokenizer:
136-
type: nGram
136+
type: ngram
137137
min_gram: 3
138138
max_gram: 3
139139
- length: { detail.tokenizer.tokens: 4 }
@@ -162,9 +162,9 @@
162162
body:
163163
text: "foo"
164164
explain: true
165-
tokenizer: nGram
165+
tokenizer: ngram
166166
- length: { detail.tokenizer.tokens: 5 }
167-
- match: { detail.tokenizer.name: nGram }
167+
- match: { detail.tokenizer.name: ngram }
168168
- match: { detail.tokenizer.tokens.0.token: f }
169169
- match: { detail.tokenizer.tokens.1.token: fo }
170170
- match: { detail.tokenizer.tokens.2.token: o }
@@ -194,7 +194,7 @@
194194
text: "foo"
195195
explain: true
196196
tokenizer:
197-
type: edgeNGram
197+
type: edge_ngram
198198
min_gram: 1
199199
max_gram: 3
200200
- length: { detail.tokenizer.tokens: 3 }
@@ -219,9 +219,9 @@
219219
body:
220220
text: "foo"
221221
explain: true
222-
tokenizer: edgeNGram
222+
tokenizer: edge_ngram
223223
- length: { detail.tokenizer.tokens: 2 }
224-
- match: { detail.tokenizer.name: edgeNGram }
224+
- match: { detail.tokenizer.name: edge_ngram }
225225
- match: { detail.tokenizer.tokens.0.token: f }
226226
- match: { detail.tokenizer.tokens.1.token: fo }
227227

modules/analysis-common/src/test/resources/rest-api-spec/test/indices.analyze/10_analyze.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@
7676
analysis:
7777
tokenizer:
7878
trigram:
79-
type: nGram
79+
type: ngram
8080
min_gram: 3
8181
max_gram: 3
8282
filter:

0 commit comments

Comments
 (0)