From 794e00b8806dee1b40ec1fce90f2bc6e4081b7dc Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Mon, 15 Jan 2018 13:45:57 +0000 Subject: [PATCH 1/4] Add index_prefix option to text fields --- .../index/analysis/PrefixWrappedAnalyzer.java | 64 +++++++++++++++++++ .../index/mapper/TextFieldMapper.java | 44 ++++++++++++- .../index/mapper/TextFieldMapperTests.java | 41 ++++++++++++ 3 files changed, 148 insertions(+), 1 deletion(-) create mode 100644 core/src/main/java/org/elasticsearch/index/analysis/PrefixWrappedAnalyzer.java diff --git a/core/src/main/java/org/elasticsearch/index/analysis/PrefixWrappedAnalyzer.java b/core/src/main/java/org/elasticsearch/index/analysis/PrefixWrappedAnalyzer.java new file mode 100644 index 0000000000000..5680496a5e472 --- /dev/null +++ b/core/src/main/java/org/elasticsearch/index/analysis/PrefixWrappedAnalyzer.java @@ -0,0 +1,64 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.analysis; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.AnalyzerWrapper; +import org.apache.lucene.analysis.TokenFilter; +import org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter; +import org.elasticsearch.common.xcontent.XContentBuilder; + +import java.io.IOException; + +public class PrefixWrappedAnalyzer extends AnalyzerWrapper { + + private final int minChars; + private final int maxChars; + private final Analyzer delegate; + + public PrefixWrappedAnalyzer(Analyzer delegate, int minChars, int maxChars) { + super(delegate.getReuseStrategy()); + this.delegate = delegate; + this.minChars = minChars; + this.maxChars = maxChars; + } + + @Override + protected Analyzer getWrappedAnalyzer(String fieldName) { + return delegate; + } + + @Override + protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) { + TokenFilter filter = new EdgeNGramTokenFilter(components.getTokenStream(), minChars, maxChars); + return new TokenStreamComponents(components.getTokenizer(), filter); + } + + public boolean accept(int length) { + return length >= minChars && length <= maxChars; + } + + public void doXContent(XContentBuilder builder) throws IOException { + builder.startObject("index_prefix"); + builder.field("min_chars", minChars); + builder.field("max_chars", maxChars); + builder.endObject(); + } +} diff --git a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java index ae99f743fe57f..582c56d8f44ae 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java @@ -19,17 +19,23 @@ package org.elasticsearch.index.mapper; +import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.document.Field; +import org.apache.lucene.document.TextField; import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.Term; +import org.apache.lucene.search.BoostQuery; +import org.apache.lucene.search.MultiTermQuery; import org.apache.lucene.search.NormsFieldExistsQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermQuery; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.support.XContentMapValues; +import org.elasticsearch.index.analysis.AnalyzerScope; import org.elasticsearch.index.analysis.NamedAnalyzer; +import org.elasticsearch.index.analysis.PrefixWrappedAnalyzer; import org.elasticsearch.index.fielddata.IndexFieldData; import org.elasticsearch.index.fielddata.plain.PagedBytesIndexFieldData; import org.elasticsearch.index.query.QueryShardContext; @@ -113,6 +119,11 @@ public Builder fielddataFrequencyFilter(double minFreq, double maxFreq, int minS return builder; } + public Builder indexPrefixes(int minChars, int maxChars) { + fieldType().setIndexPrefixes(minChars, maxChars); + return builder; + } + @Override public TextFieldMapper build(BuilderContext context) { if (positionIncrementGap != POSITION_INCREMENT_GAP_USE_ANALYZER) { @@ -161,18 +172,26 @@ public Mapper.Builder parse(String fieldName, Map node, ParserCo builder.fielddataFrequencyFilter(minFrequency, maxFrequency, minSegmentSize); DocumentMapperParser.checkNoRemainingFields(propName, frequencyFilter, parserContext.indexVersionCreated()); iterator.remove(); + } else if (propName.equals("index_prefix")) { + Map indexPrefix = (Map) propNode; + int minChars = XContentMapValues.nodeIntegerValue(indexPrefix.remove("min_chars"), 0); + int maxChars = XContentMapValues.nodeIntegerValue(indexPrefix.remove("max_chars"), 10); + builder.indexPrefixes(minChars, maxChars); + DocumentMapperParser.checkNoRemainingFields(propName, indexPrefix, parserContext.indexVersionCreated()); + iterator.remove(); } } return builder; } } - public static final class TextFieldType extends StringFieldType { + public final static class TextFieldType extends StringFieldType { private boolean fielddata; private double fielddataMinFrequency; private double fielddataMaxFrequency; private int fielddataMinSegmentSize; + private PrefixWrappedAnalyzer prefixAnalyzer; public TextFieldType() { setTokenized(true); @@ -273,11 +292,27 @@ public void setFielddataMinSegmentSize(int fielddataMinSegmentSize) { this.fielddataMinSegmentSize = fielddataMinSegmentSize; } + public void setIndexPrefixes(int minChars, int maxChars) { + checkIfFrozen(); + prefixAnalyzer = new PrefixWrappedAnalyzer(indexAnalyzer().analyzer(), minChars, maxChars); + } + @Override public String typeName() { return CONTENT_TYPE; } + @Override + public Query prefixQuery(String value, MultiTermQuery.RewriteMethod method, QueryShardContext context) { + if (prefixAnalyzer == null || prefixAnalyzer.accept(value.length()) == false) { + return super.prefixQuery(value, method, context); + } + TermQuery q = new TermQuery(new Term(name() + "._prefix", indexedValueForSearch(value))); + if (boost() != 1f) + return new BoostQuery(q, boost()); + return q; + } + @Override public Query existsQuery(QueryShardContext context) { if (omitNorms()) { @@ -348,6 +383,10 @@ protected void parseCreateField(ParseContext context, List field if (fieldType().omitNorms()) { createFieldNamesField(context, fields); } + if (fieldType().prefixAnalyzer != null) { + String prefixFieldName = fieldType().name() + "._prefix"; + fields.add(new TextField(prefixFieldName, fieldType().prefixAnalyzer.tokenStream(prefixFieldName, value))); + } } } @@ -396,5 +435,8 @@ protected void doXContentBody(XContentBuilder builder, boolean includeDefaults, builder.endObject(); } } + if (fieldType().prefixAnalyzer != null) { + fieldType().prefixAnalyzer.doXContent(builder); + } } } diff --git a/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java index 3f0f4a87792d3..bd1064d051279 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java @@ -25,7 +25,12 @@ import org.apache.lucene.index.IndexableFieldType; import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.PostingsEnum; +import org.apache.lucene.index.Term; import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.search.MultiTermQuery; +import org.apache.lucene.search.PrefixQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermQuery; import org.apache.lucene.util.BytesRef; import org.elasticsearch.action.index.IndexRequest; import org.elasticsearch.common.compress.CompressedXContent; @@ -39,6 +44,7 @@ import org.elasticsearch.index.engine.Engine; import org.elasticsearch.index.mapper.MapperService.MergeReason; import org.elasticsearch.index.mapper.TextFieldMapper.TextFieldType; +import org.elasticsearch.index.query.QueryShardContext; import org.elasticsearch.index.shard.IndexShard; import org.elasticsearch.plugins.Plugin; import org.elasticsearch.test.ESSingleNodeTestCase; @@ -52,6 +58,7 @@ import java.util.HashMap; import java.util.Map; +import static org.apache.lucene.search.MultiTermQuery.CONSTANT_SCORE_REWRITE; import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.equalTo; @@ -584,4 +591,38 @@ public void testEmptyName() throws IOException { ); assertThat(e.getMessage(), containsString("name cannot be empty string")); } + + public void testIndexPrefixMapping() throws IOException { + String mapping = XContentFactory.jsonBuilder().startObject().startObject("type") + .startObject("properties").startObject("field") + .field("type", "text") + .field("analyzer", "english") + .startObject("index_prefix") + .field("min_chars", 1) + .field("max_chars", 10) + .endObject() + .endObject().endObject() + .endObject().endObject().string(); + + DocumentMapper mapper = parser.parse("type", new CompressedXContent(mapping)); + assertEquals(mapping, mapper.mappingSource().toString()); + + QueryShardContext queryShardContext = indexService.newQueryShardContext( + randomInt(20), null, () -> { throw new UnsupportedOperationException(); }, null); + Query q = mapper.mappers().getMapper("field").fieldType().prefixQuery("goin", CONSTANT_SCORE_REWRITE, queryShardContext); + assertEquals(new TermQuery(new Term("field._prefix", "goin")), q); + q = mapper.mappers().getMapper("field").fieldType().prefixQuery("internationalisatio", CONSTANT_SCORE_REWRITE, queryShardContext); + assertEquals(new PrefixQuery(new Term("field", "internationalisatio")), q); + + ParsedDocument doc = mapper.parse(SourceToParse.source("test", "type", "1", XContentFactory.jsonBuilder() + .startObject() + .field("field", "Some English text that is going to be very useful") + .endObject() + .bytes(), + XContentType.JSON)); + + IndexableField[] fields = doc.rootDoc().getFields("field._prefix"); + assertEquals(1, fields.length); + + } } From 592f501859def1fd3f116ed1c684e8bf744e240b Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Mon, 15 Jan 2018 14:18:56 +0000 Subject: [PATCH 2/4] Move PrefixWrappedAnalyzer into private class --- .../index/analysis/PrefixWrappedAnalyzer.java | 64 ------------------- .../index/mapper/TextFieldMapper.java | 41 +++++++++++- 2 files changed, 39 insertions(+), 66 deletions(-) delete mode 100644 core/src/main/java/org/elasticsearch/index/analysis/PrefixWrappedAnalyzer.java diff --git a/core/src/main/java/org/elasticsearch/index/analysis/PrefixWrappedAnalyzer.java b/core/src/main/java/org/elasticsearch/index/analysis/PrefixWrappedAnalyzer.java deleted file mode 100644 index 5680496a5e472..0000000000000 --- a/core/src/main/java/org/elasticsearch/index/analysis/PrefixWrappedAnalyzer.java +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.elasticsearch.index.analysis; - -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.AnalyzerWrapper; -import org.apache.lucene.analysis.TokenFilter; -import org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter; -import org.elasticsearch.common.xcontent.XContentBuilder; - -import java.io.IOException; - -public class PrefixWrappedAnalyzer extends AnalyzerWrapper { - - private final int minChars; - private final int maxChars; - private final Analyzer delegate; - - public PrefixWrappedAnalyzer(Analyzer delegate, int minChars, int maxChars) { - super(delegate.getReuseStrategy()); - this.delegate = delegate; - this.minChars = minChars; - this.maxChars = maxChars; - } - - @Override - protected Analyzer getWrappedAnalyzer(String fieldName) { - return delegate; - } - - @Override - protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) { - TokenFilter filter = new EdgeNGramTokenFilter(components.getTokenStream(), minChars, maxChars); - return new TokenStreamComponents(components.getTokenizer(), filter); - } - - public boolean accept(int length) { - return length >= minChars && length <= maxChars; - } - - public void doXContent(XContentBuilder builder) throws IOException { - builder.startObject("index_prefix"); - builder.field("min_chars", minChars); - builder.field("max_chars", maxChars); - builder.endObject(); - } -} diff --git a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java index 582c56d8f44ae..c1625eb363863 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java @@ -20,6 +20,9 @@ package org.elasticsearch.index.mapper; import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.AnalyzerWrapper; +import org.apache.lucene.analysis.TokenFilter; +import org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter; import org.apache.lucene.document.Field; import org.apache.lucene.document.TextField; import org.apache.lucene.index.IndexOptions; @@ -33,9 +36,7 @@ import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.support.XContentMapValues; -import org.elasticsearch.index.analysis.AnalyzerScope; import org.elasticsearch.index.analysis.NamedAnalyzer; -import org.elasticsearch.index.analysis.PrefixWrappedAnalyzer; import org.elasticsearch.index.fielddata.IndexFieldData; import org.elasticsearch.index.fielddata.plain.PagedBytesIndexFieldData; import org.elasticsearch.index.query.QueryShardContext; @@ -185,6 +186,42 @@ public Mapper.Builder parse(String fieldName, Map node, ParserCo } } + private static class PrefixWrappedAnalyzer extends AnalyzerWrapper { + + private final int minChars; + private final int maxChars; + private final Analyzer delegate; + + PrefixWrappedAnalyzer(Analyzer delegate, int minChars, int maxChars) { + super(delegate.getReuseStrategy()); + this.delegate = delegate; + this.minChars = minChars; + this.maxChars = maxChars; + } + + @Override + protected Analyzer getWrappedAnalyzer(String fieldName) { + return delegate; + } + + @Override + protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) { + TokenFilter filter = new EdgeNGramTokenFilter(components.getTokenStream(), minChars, maxChars); + return new TokenStreamComponents(components.getTokenizer(), filter); + } + + public boolean accept(int length) { + return length >= minChars && length <= maxChars; + } + + public void doXContent(XContentBuilder builder) throws IOException { + builder.startObject("index_prefix"); + builder.field("min_chars", minChars); + builder.field("max_chars", maxChars); + builder.endObject(); + } + } + public final static class TextFieldType extends StringFieldType { private boolean fielddata; From 0bf3370184c208ab113ebda61d46bc9296691278 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Mon, 15 Jan 2018 15:36:31 +0000 Subject: [PATCH 3/4] checkstyle --- .../java/org/elasticsearch/index/mapper/TextFieldMapper.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java index c1625eb363863..be5978fd0e272 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java @@ -222,7 +222,7 @@ public void doXContent(XContentBuilder builder) throws IOException { } } - public final static class TextFieldType extends StringFieldType { + public static final class TextFieldType extends StringFieldType { private boolean fielddata; private double fielddataMinFrequency; From f7f5b8430fdb7e70bcd7d3d870f484bdb20838dd Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Tue, 16 Jan 2018 10:02:00 +0000 Subject: [PATCH 4/4] Use double-dot fieldname to prevent mapping clashes --- .../index/mapper/TextFieldMapper.java | 10 ++++++---- .../index/mapper/TextFieldMapperTests.java | 18 ++++++++++++++++-- 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java index be5978fd0e272..f9774c5178bb5 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java @@ -188,6 +188,8 @@ public Mapper.Builder parse(String fieldName, Map node, ParserCo private static class PrefixWrappedAnalyzer extends AnalyzerWrapper { + static final String SUBFIELD = "..prefix"; + private final int minChars; private final int maxChars; private final Analyzer delegate; @@ -210,11 +212,11 @@ protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComp return new TokenStreamComponents(components.getTokenizer(), filter); } - public boolean accept(int length) { + boolean accept(int length) { return length >= minChars && length <= maxChars; } - public void doXContent(XContentBuilder builder) throws IOException { + void doXContent(XContentBuilder builder) throws IOException { builder.startObject("index_prefix"); builder.field("min_chars", minChars); builder.field("max_chars", maxChars); @@ -344,7 +346,7 @@ public Query prefixQuery(String value, MultiTermQuery.RewriteMethod method, Quer if (prefixAnalyzer == null || prefixAnalyzer.accept(value.length()) == false) { return super.prefixQuery(value, method, context); } - TermQuery q = new TermQuery(new Term(name() + "._prefix", indexedValueForSearch(value))); + TermQuery q = new TermQuery(new Term(name() + PrefixWrappedAnalyzer.SUBFIELD, indexedValueForSearch(value))); if (boost() != 1f) return new BoostQuery(q, boost()); return q; @@ -421,7 +423,7 @@ protected void parseCreateField(ParseContext context, List field createFieldNamesField(context, fields); } if (fieldType().prefixAnalyzer != null) { - String prefixFieldName = fieldType().name() + "._prefix"; + String prefixFieldName = fieldType().name() + PrefixWrappedAnalyzer.SUBFIELD; fields.add(new TextField(prefixFieldName, fieldType().prefixAnalyzer.tokenStream(prefixFieldName, value))); } } diff --git a/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java index bd1064d051279..a12a3d8ba0770 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java @@ -610,7 +610,7 @@ public void testIndexPrefixMapping() throws IOException { QueryShardContext queryShardContext = indexService.newQueryShardContext( randomInt(20), null, () -> { throw new UnsupportedOperationException(); }, null); Query q = mapper.mappers().getMapper("field").fieldType().prefixQuery("goin", CONSTANT_SCORE_REWRITE, queryShardContext); - assertEquals(new TermQuery(new Term("field._prefix", "goin")), q); + assertEquals(new TermQuery(new Term("field..prefix", "goin")), q); q = mapper.mappers().getMapper("field").fieldType().prefixQuery("internationalisatio", CONSTANT_SCORE_REWRITE, queryShardContext); assertEquals(new PrefixQuery(new Term("field", "internationalisatio")), q); @@ -621,8 +621,22 @@ public void testIndexPrefixMapping() throws IOException { .bytes(), XContentType.JSON)); - IndexableField[] fields = doc.rootDoc().getFields("field._prefix"); + IndexableField[] fields = doc.rootDoc().getFields("field..prefix"); assertEquals(1, fields.length); + String illegalMapping = XContentFactory.jsonBuilder().startObject().startObject("type") + .startObject("properties").startObject("field") + .field("type", "text") + .field("analyzer", "english") + .startObject("fields") + .startObject("field..prefix").field("type", "text").endObject() + .endObject() + .endObject().endObject() + .endObject().endObject().string(); + + MapperParsingException e = expectThrows(MapperParsingException.class, + () -> parser.parse("type", new CompressedXContent(illegalMapping)) + ); + assertThat(e.getMessage(), containsString("cannot contain '.'")); } }