Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add index_prefix option to text fields #28222

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,17 @@

package org.elasticsearch.index.mapper;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.AnalyzerWrapper;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BoostQuery;
import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.NormsFieldExistsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
Expand Down Expand Up @@ -113,6 +120,11 @@ public Builder fielddataFrequencyFilter(double minFreq, double maxFreq, int minS
return builder;
}

public Builder indexPrefixes(int minChars, int maxChars) {
fieldType().setIndexPrefixes(minChars, maxChars);
return builder;
}

@Override
public TextFieldMapper build(BuilderContext context) {
if (positionIncrementGap != POSITION_INCREMENT_GAP_USE_ANALYZER) {
Expand Down Expand Up @@ -161,18 +173,64 @@ public Mapper.Builder parse(String fieldName, Map<String, Object> node, ParserCo
builder.fielddataFrequencyFilter(minFrequency, maxFrequency, minSegmentSize);
DocumentMapperParser.checkNoRemainingFields(propName, frequencyFilter, parserContext.indexVersionCreated());
iterator.remove();
} else if (propName.equals("index_prefix")) {
Map<?, ?> indexPrefix = (Map<?, ?>) propNode;
int minChars = XContentMapValues.nodeIntegerValue(indexPrefix.remove("min_chars"), 0);
int maxChars = XContentMapValues.nodeIntegerValue(indexPrefix.remove("max_chars"), 10);
builder.indexPrefixes(minChars, maxChars);
DocumentMapperParser.checkNoRemainingFields(propName, indexPrefix, parserContext.indexVersionCreated());
iterator.remove();
}
}
return builder;
}
}

private static class PrefixWrappedAnalyzer extends AnalyzerWrapper {

static final String SUBFIELD = "..prefix";

private final int minChars;
private final int maxChars;
private final Analyzer delegate;

PrefixWrappedAnalyzer(Analyzer delegate, int minChars, int maxChars) {
super(delegate.getReuseStrategy());
this.delegate = delegate;
this.minChars = minChars;
this.maxChars = maxChars;
}

@Override
protected Analyzer getWrappedAnalyzer(String fieldName) {
return delegate;
}

@Override
protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) {
TokenFilter filter = new EdgeNGramTokenFilter(components.getTokenStream(), minChars, maxChars);
return new TokenStreamComponents(components.getTokenizer(), filter);
}

boolean accept(int length) {
return length >= minChars && length <= maxChars;
}

void doXContent(XContentBuilder builder) throws IOException {
builder.startObject("index_prefix");
builder.field("min_chars", minChars);
builder.field("max_chars", maxChars);
builder.endObject();
}
}

public static final class TextFieldType extends StringFieldType {

private boolean fielddata;
private double fielddataMinFrequency;
private double fielddataMaxFrequency;
private int fielddataMinSegmentSize;
private PrefixWrappedAnalyzer prefixAnalyzer;

public TextFieldType() {
setTokenized(true);
Expand Down Expand Up @@ -273,11 +331,27 @@ public void setFielddataMinSegmentSize(int fielddataMinSegmentSize) {
this.fielddataMinSegmentSize = fielddataMinSegmentSize;
}

public void setIndexPrefixes(int minChars, int maxChars) {
checkIfFrozen();
prefixAnalyzer = new PrefixWrappedAnalyzer(indexAnalyzer().analyzer(), minChars, maxChars);
}

@Override
public String typeName() {
return CONTENT_TYPE;
}

@Override
public Query prefixQuery(String value, MultiTermQuery.RewriteMethod method, QueryShardContext context) {
if (prefixAnalyzer == null || prefixAnalyzer.accept(value.length()) == false) {
return super.prefixQuery(value, method, context);
}
TermQuery q = new TermQuery(new Term(name() + PrefixWrappedAnalyzer.SUBFIELD, indexedValueForSearch(value)));
if (boost() != 1f)
return new BoostQuery(q, boost());
return q;
}

@Override
public Query existsQuery(QueryShardContext context) {
if (omitNorms()) {
Expand Down Expand Up @@ -348,6 +422,10 @@ protected void parseCreateField(ParseContext context, List<IndexableField> field
if (fieldType().omitNorms()) {
createFieldNamesField(context, fields);
}
if (fieldType().prefixAnalyzer != null) {
String prefixFieldName = fieldType().name() + PrefixWrappedAnalyzer.SUBFIELD;
fields.add(new TextField(prefixFieldName, fieldType().prefixAnalyzer.tokenStream(prefixFieldName, value)));
}
}
}

Expand Down Expand Up @@ -396,5 +474,8 @@ protected void doXContentBody(XContentBuilder builder, boolean includeDefaults,
builder.endObject();
}
}
if (fieldType().prefixAnalyzer != null) {
fieldType().prefixAnalyzer.doXContent(builder);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,12 @@
import org.apache.lucene.index.IndexableFieldType;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.common.compress.CompressedXContent;
Expand All @@ -39,6 +44,7 @@
import org.elasticsearch.index.engine.Engine;
import org.elasticsearch.index.mapper.MapperService.MergeReason;
import org.elasticsearch.index.mapper.TextFieldMapper.TextFieldType;
import org.elasticsearch.index.query.QueryShardContext;
import org.elasticsearch.index.shard.IndexShard;
import org.elasticsearch.plugins.Plugin;
import org.elasticsearch.test.ESSingleNodeTestCase;
Expand All @@ -52,6 +58,7 @@
import java.util.HashMap;
import java.util.Map;

import static org.apache.lucene.search.MultiTermQuery.CONSTANT_SCORE_REWRITE;
import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.equalTo;

Expand Down Expand Up @@ -584,4 +591,52 @@ public void testEmptyName() throws IOException {
);
assertThat(e.getMessage(), containsString("name cannot be empty string"));
}

public void testIndexPrefixMapping() throws IOException {
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
.startObject("properties").startObject("field")
.field("type", "text")
.field("analyzer", "english")
.startObject("index_prefix")
.field("min_chars", 1)
.field("max_chars", 10)
.endObject()
.endObject().endObject()
.endObject().endObject().string();

DocumentMapper mapper = parser.parse("type", new CompressedXContent(mapping));
assertEquals(mapping, mapper.mappingSource().toString());

QueryShardContext queryShardContext = indexService.newQueryShardContext(
randomInt(20), null, () -> { throw new UnsupportedOperationException(); }, null);
Query q = mapper.mappers().getMapper("field").fieldType().prefixQuery("goin", CONSTANT_SCORE_REWRITE, queryShardContext);
assertEquals(new TermQuery(new Term("field..prefix", "goin")), q);
q = mapper.mappers().getMapper("field").fieldType().prefixQuery("internationalisatio", CONSTANT_SCORE_REWRITE, queryShardContext);
assertEquals(new PrefixQuery(new Term("field", "internationalisatio")), q);

ParsedDocument doc = mapper.parse(SourceToParse.source("test", "type", "1", XContentFactory.jsonBuilder()
.startObject()
.field("field", "Some English text that is going to be very useful")
.endObject()
.bytes(),
XContentType.JSON));

IndexableField[] fields = doc.rootDoc().getFields("field..prefix");
assertEquals(1, fields.length);

String illegalMapping = XContentFactory.jsonBuilder().startObject().startObject("type")
.startObject("properties").startObject("field")
.field("type", "text")
.field("analyzer", "english")
.startObject("fields")
.startObject("field..prefix").field("type", "text").endObject()
.endObject()
.endObject().endObject()
.endObject().endObject().string();

MapperParsingException e = expectThrows(MapperParsingException.class,
() -> parser.parse("type", new CompressedXContent(illegalMapping))
);
assertThat(e.getMessage(), containsString("cannot contain '.'"));
}
}