Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add doc values support for JSON fields. #40069

Merged
merged 17 commits into from
Apr 1, 2019
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 19 additions & 3 deletions docs/reference/mapping/types/json.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ the following advantages:

However, `json` fields present a trade-off in terms of search functionality.
Only basic queries are allowed, with no support for numeric range queries or
aggregations. Further information on the limitations can be found in the
highlighting. Further information on the limitations can be found in the
<<supported-operations, Supported operations>> section.

NOTE: The `json` mapping type should **not** be used for indexing all JSON
Expand Down Expand Up @@ -107,9 +107,13 @@ Currently, `json` fields can be used with the following query types:

When querying, it is not possible to refer to field keys using wildcards, as in
`{ "term": {"labels.time*": 1541457010}}`. Note that all queries, including
`range`, treat the values as string keywords.
`range`, treat the values as string keywords. Highlighting is not supported on
`json` fields.

Aggregating, highlighting, or sorting on a `json` field is not supported.
It is possible to sort on a `json` field, as well as perform simple
keyword-style aggregations such as `terms`. As with queries, there is no
special support for numerics -- all values in the JSON object are treated as
keywords. When sorting, this implies that values are compared lexicographically.

Finally, because of the way leaf values are stored in the index, the null
character `\0` is not allowed to appear in the keys of the JSON object.
Expand Down Expand Up @@ -154,6 +158,18 @@ parameters are accepted:
objects. If a JSON field exceeds this limit, then an error will be
thrown. Defaults to `20`.

<<doc-values,`doc_values`>>::

Should the field be stored on disk in a column-stride fashion, so that it
can later be used for sorting, aggregations, or scripting? Accepts `true`
(default) or `false`.

<<eager-global-ordinals,`eager_global_ordinals`>>::

Should global ordinals be loaded eagerly on refresh? Accepts `true` or `false`
(default). Enabling this is a good idea on fields that are frequently used for
terms aggregations.

<<ignore-above,`ignore_above`>>::

Leaf values longer than this limit will not be indexed. By default, there
Expand Down
190 changes: 158 additions & 32 deletions server/src/main/java/org/elasticsearch/index/mapper/JsonFieldMapper.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,19 @@

import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.OrdinalMap;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.DocValuesFieldExistsQuery;
import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.Version;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.common.settings.Settings;
Expand All @@ -39,9 +43,21 @@
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.common.xcontent.json.JsonXContent;
import org.elasticsearch.common.xcontent.support.XContentMapValues;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.AnalyzerScope;
import org.elasticsearch.index.analysis.NamedAnalyzer;
import org.elasticsearch.index.fielddata.AtomicOrdinalsFieldData;
import org.elasticsearch.index.fielddata.IndexFieldData;
import org.elasticsearch.index.fielddata.IndexFieldDataCache;
import org.elasticsearch.index.fielddata.IndexOrdinalsFieldData;
import org.elasticsearch.index.fielddata.fieldcomparator.BytesRefFieldComparatorSource;
import org.elasticsearch.index.fielddata.plain.AbstractAtomicOrdinalsFieldData;
import org.elasticsearch.index.fielddata.plain.DocValuesIndexFieldData;
import org.elasticsearch.index.fielddata.plain.SortedSetDVOrdinalsIndexFieldData;
import org.elasticsearch.index.query.QueryShardContext;
import org.elasticsearch.indices.breaker.CircuitBreakerService;
import org.elasticsearch.search.MultiValueMode;

import java.io.IOException;
import java.util.Iterator;
Expand Down Expand Up @@ -93,7 +109,7 @@ private static class Defaults {
static {
FIELD_TYPE.setTokenized(false);
FIELD_TYPE.setStored(false);
FIELD_TYPE.setHasDocValues(false);
FIELD_TYPE.setHasDocValues(true);
FIELD_TYPE.setIndexOptions(IndexOptions.DOCS);
FIELD_TYPE.setOmitNorms(true);
FIELD_TYPE.freeze();
Expand Down Expand Up @@ -127,14 +143,6 @@ public Builder indexOptions(IndexOptions indexOptions) {
return super.indexOptions(indexOptions);
}

@Override
public Builder docValues(boolean docValues) {
if (docValues) {
throw new IllegalArgumentException("[" + CONTENT_TYPE + "] fields do not support doc values");
}
return super.docValues(docValues);
}

public Builder depthLimit(int depthLimit) {
if (depthLimit < 0) {
throw new IllegalArgumentException("[depth_limit] must be positive, got " + depthLimit);
Expand All @@ -143,6 +151,11 @@ public Builder depthLimit(int depthLimit) {
return this;
}

public Builder eagerGlobalOrdinals(boolean eagerGlobalOrdinals) {
fieldType().setEagerGlobalOrdinals(eagerGlobalOrdinals);
return builder;
}

public Builder ignoreAbove(int ignoreAbove) {
if (ignoreAbove < 0) {
throw new IllegalArgumentException("[ignore_above] must be positive, got " + ignoreAbove);
Expand All @@ -166,11 +179,6 @@ public Builder copyTo(CopyTo copyTo) {
throw new UnsupportedOperationException("[copy_to] is not supported for [" + CONTENT_TYPE + "] fields.");
}

@Override
protected boolean defaultDocValues(Version indexCreated) {
return false;
}

@Override
public JsonFieldMapper build(BuilderContext context) {
setupFieldType(context);
Expand All @@ -194,6 +202,9 @@ public Mapper.Builder<?,?> parse(String name, Map<String, Object> node, ParserCo
if (propName.equals("depth_limit")) {
builder.depthLimit(XContentMapValues.nodeIntegerValue(propNode, -1));
iterator.remove();
} else if (propName.equals("eager_global_ordinals")) {
builder.eagerGlobalOrdinals(XContentMapValues.nodeBooleanValue(propNode, "eager_global_ordinals"));
iterator.remove();
} else if (propName.equals("ignore_above")) {
builder.ignoreAbove(XContentMapValues.nodeIntegerValue(propNode, -1));
iterator.remove();
Expand Down Expand Up @@ -221,7 +232,7 @@ public static final class KeyedJsonFieldType extends StringFieldType {
private final String key;
private boolean splitQueriesOnWhitespace;

KeyedJsonFieldType(String key) {
public KeyedJsonFieldType(String key) {
setIndexAnalyzer(Lucene.KEYWORD_ANALYZER);
setSearchAnalyzer(Lucene.KEYWORD_ANALYZER);
this.key = key;
Expand Down Expand Up @@ -323,6 +334,7 @@ public Query wildcardQuery(String value,
CONTENT_TYPE + "] fields.");
}

@Override
public BytesRef indexedValueForSearch(Object value) {
if (value == null) {
return null;
Expand All @@ -334,6 +346,108 @@ public BytesRef indexedValueForSearch(Object value) {
String keyedValue = JsonFieldParser.createKeyedValue(key, stringValue);
return new BytesRef(keyedValue);
}

@Override
public IndexFieldData.Builder fielddataBuilder(String fullyQualifiedIndexName) {
failIfNoDocValues();
return new KeyedJsonIndexFieldData.Builder(key);
}
}

/**
* A field data implementation that gives access to the values associated with
* a particular JSON key.
*
* This class wraps the field data that is built directly on the keyed JSON field, and
* filters out values whose prefix doesn't match the requested key. Loading and caching
* is fully delegated to the wrapped field data, so that different {@link KeyedJsonIndexFieldData}
* for the same JSON field share the same global ordinals.
*/
public static class KeyedJsonIndexFieldData implements IndexOrdinalsFieldData {
private final String key;
private final IndexOrdinalsFieldData delegate;

private KeyedJsonIndexFieldData(String key, IndexOrdinalsFieldData delegate) {
this.delegate = delegate;
this.key = key;
}

public String getKey() {
return key;
}

@Override
public String getFieldName() {
return delegate.getFieldName();
}

@Override
public SortField sortField(Object missingValue,
MultiValueMode sortMode,
XFieldComparatorSource.Nested nested,
boolean reverse) {
XFieldComparatorSource source = new BytesRefFieldComparatorSource(this, missingValue, sortMode, nested);
return new SortField(getFieldName(), source, reverse);
}

@Override
public void clear() {
delegate.clear();
}

@Override
public AtomicOrdinalsFieldData load(LeafReaderContext context) {
AtomicOrdinalsFieldData fieldData = delegate.load(context);
return new KeyedJsonAtomicFieldData(key, fieldData);
}

@Override
public AtomicOrdinalsFieldData loadDirect(LeafReaderContext context) throws Exception {
AtomicOrdinalsFieldData fieldData = delegate.loadDirect(context);
return new KeyedJsonAtomicFieldData(key, fieldData);
}

@Override
public IndexOrdinalsFieldData loadGlobal(DirectoryReader indexReader) {
IndexOrdinalsFieldData fieldData = delegate.loadGlobal(indexReader);
return new KeyedJsonIndexFieldData(key, fieldData);
}

@Override
public IndexOrdinalsFieldData localGlobalDirect(DirectoryReader indexReader) throws Exception {
IndexOrdinalsFieldData fieldData = delegate.localGlobalDirect(indexReader);
return new KeyedJsonIndexFieldData(key, fieldData);
}

@Override
public OrdinalMap getOrdinalMap() {
return delegate.getOrdinalMap();
}

@Override
public Index index() {
return delegate.index();
}

public static class Builder implements IndexFieldData.Builder {
private final String key;

Builder(String key) {
this.key = key;
}

@Override
public IndexFieldData<?> build(IndexSettings indexSettings,
MappedFieldType fieldType,
IndexFieldDataCache cache,
CircuitBreakerService breakerService,
MapperService mapperService) {
String fieldName = fieldType.name();
IndexOrdinalsFieldData delegate = new SortedSetDVOrdinalsIndexFieldData(indexSettings,
cache, fieldName, breakerService, AbstractAtomicOrdinalsFieldData.DEFAULT_SCRIPT_FUNCTION);
return new KeyedJsonIndexFieldData(key, delegate);
}
}
}

/**
Expand Down Expand Up @@ -396,7 +510,11 @@ public Object valueForDisplay(Object value) {

@Override
public Query existsQuery(QueryShardContext context) {
return new TermQuery(new Term(FieldNamesFieldMapper.NAME, name()));
if (hasDocValues()) {
return new DocValuesFieldExistsQuery(name());
} else {
return new TermQuery(new Term(FieldNamesFieldMapper.NAME, name()));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is not related to this pr but KeyedJsonFieldType#existsQuery should also use the _field_names field instead of a prefix query ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks, I will give this some thought and follow up in a separate issue/ PR.

}
}

@Override
Expand All @@ -420,6 +538,12 @@ public Query wildcardQuery(String value,
throw new UnsupportedOperationException("[wildcard] queries are not currently supported on [" +
CONTENT_TYPE + "] fields.");
}

@Override
public IndexFieldData.Builder fielddataBuilder(String fullyQualifiedIndexName) {
failIfNoDocValues();
return new DocValuesIndexFieldData.Builder();
}
}

private final JsonFieldParser fieldParser;
Expand All @@ -438,7 +562,7 @@ private JsonFieldMapper(String simpleName,
this.depthLimit = depthLimit;
this.ignoreAbove = ignoreAbove;
this.fieldParser = new JsonFieldParser(fieldType.name(), keyedFieldName(),
depthLimit, ignoreAbove, fieldType.nullValueAsString());
fieldType, depthLimit, ignoreAbove);
}

@Override
Expand Down Expand Up @@ -476,7 +600,9 @@ protected void parseCreateField(ParseContext context, List<IndexableField> field
return;
}

if (fieldType.indexOptions() == IndexOptions.NONE && !fieldType.stored()) {
if (fieldType.indexOptions() == IndexOptions.NONE
&& !fieldType.hasDocValues()
&& !fieldType.stored()) {
context.parser().skipChildren();
return;
}
Expand All @@ -490,22 +616,22 @@ protected void parseCreateField(ParseContext context, List<IndexableField> field
fields.add(new StoredField(fieldType.name(), storedValue));
}

if (fieldType().indexOptions() != IndexOptions.NONE) {
XContentParser indexedFieldsParser = context.parser();
XContentParser indexedFieldsParser = context.parser();

// If store is enabled, we've already consumed the content to produce the stored field. Here we
// 'reset' the parser, so that we can traverse the content again.
if (storedValue != null) {
indexedFieldsParser = JsonXContent.jsonXContent.createParser(context.parser().getXContentRegistry(),
context.parser().getDeprecationHandler(),
storedValue.bytes);
indexedFieldsParser.nextToken();
}

fields.addAll(fieldParser.parse(indexedFieldsParser));
// If store is enabled, we've already consumed the content to produce the stored field. Here we
// 'reset' the parser, so that we can traverse the content again.
if (storedValue != null) {
indexedFieldsParser = JsonXContent.jsonXContent.createParser(context.parser().getXContentRegistry(),
context.parser().getDeprecationHandler(),
storedValue.bytes);
indexedFieldsParser.nextToken();
}

createFieldNamesField(context, fields);
fields.addAll(fieldParser.parse(indexedFieldsParser));

if (!fieldType.hasDocValues()) {
createFieldNamesField(context, fields);
}
}

@Override
Expand Down
Loading