Remove nGram and edgeNGram token filter names (elastic#38911)

Christoph Büscher · Christoph Büscher · commit 4c2ce0a80075 · 2019-02-18T20:42:30.000+01:00
In elastic#30209 we deprecated the camel case `nGram` filter name in favour of `ngram` and did the same for `edgeNGram` and `edge_ngram`. Using these names has been deprecated since 6.4 and is issuing deprecation warnings since then. I think we can remove these filters in 8.0. In a backport of this PR I would change what was a dreprecation warning from 6.4. to an error starting with new indices created in 7.0.
diff --git a/docs/reference/analysis/tokenfilters/edgengram-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/edgengram-tokenfilter.asciidoc
@@ -1,9 +1,9 @@
 [[analysis-edgengram-tokenfilter]]
 === Edge NGram Token Filter
 
-A token filter of type `edgeNGram`.
+A token filter of type `edge_ngram`.
 
-The following are settings that can be set for a `edgeNGram` token
+The following are settings that can be set for a `edge_ngram` token
 filter type:
 
 [cols="<,<",options="header",]
diff --git a/docs/reference/analysis/tokenfilters/ngram-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/ngram-tokenfilter.asciidoc
@@ -1,9 +1,9 @@
 [[analysis-ngram-tokenfilter]]
 === NGram Token Filter
 
-A token filter of type `nGram`.
+A token filter of type `ngram`.
 
-The following are settings that can be set for a `nGram` token filter
+The following are settings that can be set for a `ngram` token filter
 type:
 
 [cols="<,<",options="header",]
diff --git a/docs/reference/migration/migrate_7_0/analysis.asciidoc b/docs/reference/migration/migrate_7_0/analysis.asciidoc
@@ -38,4 +38,12 @@ The `standard` token filter has been removed because it doesn't change anything
 The `standard_html_strip` analyzer has been deprecated, and should be replaced
 with a combination of the `standard` tokenizer and `html_strip` char_filter.
 Indexes created using this analyzer will still be readable in elasticsearch 7.0,
-but it will not be possible to create new indexes using it.
+but it will not be possible to create new indexes using it.
+
+[float]
+==== The deprecated `nGram` and `edgeNGram` token filter cannot be used on new indices
+
+The `nGram` and `edgeNGram` token filter names have been deprecated since version 6.4.
+Indexes created using these token filters will still be readable in elasticsearch 7.0.
+Using them on new indices from version 7.0.0 on however will throw an error when indexing
+or analyzing documents. Both names should be replaces by `ngram` or `edge_ngram` respectively.
diff --git a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java
@@ -415,7 +415,11 @@ public List<PreConfiguredTokenFilter> getPreConfiguredTokenFilters() {
         filters.add(PreConfiguredTokenFilter.singleton("edge_ngram", false, false, input ->
                 new EdgeNGramTokenFilter(input, 1)));
         filters.add(PreConfiguredTokenFilter.singletonWithVersion("edgeNGram", false, false, (reader, version) -> {
-            if (version.onOrAfter(org.elasticsearch.Version.V_6_4_0)) {
+            if (version.onOrAfter(org.elasticsearch.Version.V_7_0_0)) {
+                throw new IllegalArgumentException(
+                        "The [edgeNGram] token filter name was deprecated in 6.4 and cannot be used in new indices. "
+                                + "Please change the filter name to [edge_ngram] instead.");
+            } else if (version.onOrAfter(org.elasticsearch.Version.V_6_4_0)) {
                 deprecationLogger.deprecatedAndMaybeLog("edgeNGram_deprecation",
                         "The [edgeNGram] token filter name is deprecated and will be removed in a future version. "
                                 + "Please change the filter name to [edge_ngram] instead.");
@@ -439,7 +443,10 @@ public List<PreConfiguredTokenFilter> getPreConfiguredTokenFilters() {
                         LimitTokenCountFilterFactory.DEFAULT_CONSUME_ALL_TOKENS)));
         filters.add(PreConfiguredTokenFilter.singleton("ngram", false, false, reader -> new NGramTokenFilter(reader, 1, 2, false)));
         filters.add(PreConfiguredTokenFilter.singletonWithVersion("nGram", false, false, (reader, version) -> {
-            if (version.onOrAfter(org.elasticsearch.Version.V_6_4_0)) {
+            if (version.onOrAfter(org.elasticsearch.Version.V_7_0_0)) {
+                throw new IllegalArgumentException("The [nGram] token filter name was deprecated in 6.4 and cannot be used in new indices. "
+                        + "Please change the filter name to [ngram] instead.");
+            } else if (version.onOrAfter(org.elasticsearch.Version.V_6_4_0)) {
                 deprecationLogger.deprecatedAndMaybeLog("nGram_deprecation",
                         "The [nGram] token filter name is deprecated and will be removed in a future version. "
                                 + "Please change the filter name to [ngram] instead.");
diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisPluginTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisPluginTests.java
@@ -45,7 +45,7 @@ public class CommonAnalysisPluginTests extends ESTestCase {
      */
     public void testNGramDeprecationWarning() throws IOException {
         Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
-                .put(IndexMetaData.SETTING_VERSION_CREATED, VersionUtils.randomVersionBetween(random(), Version.V_6_4_0, Version.CURRENT))
+                .put(IndexMetaData.SETTING_VERSION_CREATED, VersionUtils.randomVersionBetween(random(), Version.V_6_4_0, Version.V_6_7_0))
                 .build();
 
         IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings);
@@ -61,6 +61,28 @@ public void testNGramDeprecationWarning() throws IOException {
         }
     }
 
+    /**
+     * Check that the deprecated name "nGram" throws an error since 7.0.0
+     */
+    public void testNGramDeprecationError() throws IOException {
+        Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
+                .put(IndexMetaData.SETTING_VERSION_CREATED, VersionUtils.randomVersionBetween(random(), Version.V_7_0_0, null))
+                .build();
+
+        IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings);
+        try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) {
+            Map<String, TokenFilterFactory> tokenFilters = createTestAnalysis(idxSettings, settings, commonAnalysisPlugin).tokenFilter;
+            TokenFilterFactory tokenFilterFactory = tokenFilters.get("nGram");
+            Tokenizer tokenizer = new MockTokenizer();
+            tokenizer.setReader(new StringReader("foo bar"));
+            IllegalArgumentException ex = expectThrows(IllegalArgumentException.class, () -> tokenFilterFactory.create(tokenizer));
+            assertEquals(
+                    "The [nGram] token filter name was deprecated in 6.4 and cannot be used in new indices. Please change the filter"
+                    + " name to [ngram] instead.",
+                    ex.getMessage());
+        }
+    }
+
     /**
      * Check that the deprecated name "nGram" does NOT issues a deprecation warning for indices created before 6.4.0
      */
@@ -80,12 +102,13 @@ public void testNGramNoDeprecationWarningPre6_4() throws IOException {
         }
     }
 
+
     /**
      * Check that the deprecated name "edgeNGram" issues a deprecation warning for indices created since 6.3.0
      */
     public void testEdgeNGramDeprecationWarning() throws IOException {
         Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
-                .put(IndexMetaData.SETTING_VERSION_CREATED, VersionUtils.randomVersionBetween(random(), Version.V_6_4_0, Version.CURRENT))
+                .put(IndexMetaData.SETTING_VERSION_CREATED, VersionUtils.randomVersionBetween(random(), Version.V_6_4_0, Version.V_6_7_0))
                 .build();
 
         IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings);
@@ -101,6 +124,28 @@ public void testEdgeNGramDeprecationWarning() throws IOException {
         }
     }
 
+    /**
+     * Check that the deprecated name "edgeNGram" throws an error for indices created since 7.0.0
+     */
+    public void testEdgeNGramDeprecationError() throws IOException {
+        Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
+                .put(IndexMetaData.SETTING_VERSION_CREATED, VersionUtils.randomVersionBetween(random(), Version.V_7_0_0, null))
+                .build();
+
+        IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings);
+        try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) {
+            Map<String, TokenFilterFactory> tokenFilters = createTestAnalysis(idxSettings, settings, commonAnalysisPlugin).tokenFilter;
+            TokenFilterFactory tokenFilterFactory = tokenFilters.get("edgeNGram");
+            Tokenizer tokenizer = new MockTokenizer();
+            tokenizer.setReader(new StringReader("foo bar"));
+            IllegalArgumentException ex = expectThrows(IllegalArgumentException.class, () -> tokenFilterFactory.create(tokenizer));
+            assertEquals(
+                    "The [edgeNGram] token filter name was deprecated in 6.4 and cannot be used in new indices. Please change the filter"
+                    + " name to [edge_ngram] instead.",
+                    ex.getMessage());
+        }
+    }
+
     /**
      * Check that the deprecated name "edgeNGram" does NOT issues a deprecation warning for indices created before 6.4.0
      */
diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/HighlighterWithAnalyzersTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/HighlighterWithAnalyzersTests.java
@@ -81,7 +81,7 @@ public void testNgramHighlightingWithBrokenPositions() throws IOException {
                         .put("analysis.tokenizer.autocomplete.max_gram", 20)
                         .put("analysis.tokenizer.autocomplete.min_gram", 1)
                         .put("analysis.tokenizer.autocomplete.token_chars", "letter,digit")
-                        .put("analysis.tokenizer.autocomplete.type", "nGram")
+                        .put("analysis.tokenizer.autocomplete.type", "ngram")
                         .put("analysis.filter.wordDelimiter.type", "word_delimiter")
                         .putList("analysis.filter.wordDelimiter.type_table",
                                 "& => ALPHANUM", "| => ALPHANUM", "! => ALPHANUM",
diff --git a/modules/analysis-common/src/test/resources/rest-api-spec/test/analysis-common/30_tokenizers.yml b/modules/analysis-common/src/test/resources/rest-api-spec/test/analysis-common/30_tokenizers.yml
@@ -23,14 +23,14 @@
     - match:  { detail.tokenizer.tokens.0.token: Foo Bar! }
 
 ---
-"nGram":
+"ngram":
     - do:
         indices.analyze:
           body:
             text: good
             explain: true
             tokenizer:
-              type: nGram
+              type: ngram
               min_gram: 2
               max_gram: 2
     - length: { detail.tokenizer.tokens: 3 }
@@ -40,7 +40,7 @@
     - match:  { detail.tokenizer.tokens.2.token: od }
 
 ---
-"nGram_exception":
+"ngram_exception":
     - skip:
         version: " - 6.99.99"
         reason: only starting from version 7.x this throws an error
@@ -51,7 +51,7 @@
             text: good
             explain: true
             tokenizer:
-              type: nGram
+              type: ngram
               min_gram: 2
               max_gram: 4
 ---
@@ -133,7 +133,7 @@
             text: "foobar"
             explain: true
             tokenizer:
-              type: nGram
+              type: ngram
               min_gram: 3
               max_gram: 3
     - length: { detail.tokenizer.tokens: 4 }
@@ -162,9 +162,9 @@
           body:
             text: "foo"
             explain: true
-            tokenizer: nGram
+            tokenizer: ngram
     - length: { detail.tokenizer.tokens: 5 }
-    - match:  { detail.tokenizer.name: nGram }
+    - match:  { detail.tokenizer.name: ngram }
     - match:  { detail.tokenizer.tokens.0.token: f }
     - match:  { detail.tokenizer.tokens.1.token: fo }
     - match:  { detail.tokenizer.tokens.2.token: o }
@@ -194,7 +194,7 @@
             text: "foo"
             explain: true
             tokenizer:
-              type: edgeNGram
+              type: edge_ngram
               min_gram: 1
               max_gram: 3
     - length: { detail.tokenizer.tokens: 3 }
@@ -219,9 +219,9 @@
           body:
             text: "foo"
             explain: true
-            tokenizer: edgeNGram
+            tokenizer: edge_ngram
     - length: { detail.tokenizer.tokens: 2 }
-    - match:  { detail.tokenizer.name: edgeNGram }
+    - match:  { detail.tokenizer.name: edge_ngram }
     - match:  { detail.tokenizer.tokens.0.token: f }
     - match:  { detail.tokenizer.tokens.1.token: fo }
 
diff --git a/modules/analysis-common/src/test/resources/rest-api-spec/test/indices.analyze/10_analyze.yml b/modules/analysis-common/src/test/resources/rest-api-spec/test/indices.analyze/10_analyze.yml
@@ -76,7 +76,7 @@
                 analysis:
                   tokenizer:
                     trigram:
-                      type: nGram
+                      type: ngram
                       min_gram: 3
                       max_gram: 3
                   filter: