Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- Add new extensible method to DocRequest to specify type ([#19313](https://github.com/opensearch-project/OpenSearch/pull/19313))
- [Rule based auto-tagging] Add Rule based auto-tagging IT ([#18550](https://github.com/opensearch-project/OpenSearch/pull/18550))
- Add all-active ingestion as docrep equivalent in pull-based ingestion ([#19316](https://github.com/opensearch-project/OpenSearch/pull/19316))
- Add skip_list param for date, scaled float and token count fields ([#19142](https://github.com/opensearch-project/OpenSearch/pull/19142))

### Changed
- Refactor `if-else` chains to use `Java 17 pattern matching switch expressions`(([#18965](https://github.com/opensearch-project/OpenSearch/pull/18965))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,13 @@ public static class Builder extends ParametrizedFieldMapper.Builder {
(n, c, o) -> o == null ? null : XContentMapValues.nodeDoubleValue(o),
m -> toType(m).nullValue
).acceptsNull();

private final Parameter<Boolean> skiplist = new Parameter<>(
"skip_list",
false,
() -> false,
(n, c, o) -> XContentMapValues.nodeBooleanValue(o),
m -> toType(m).skiplist
);
private final Parameter<Map<String, String>> meta = Parameter.metaParam();

public Builder(String name, Settings settings) {
Expand Down Expand Up @@ -148,7 +154,7 @@ Builder nullValue(double nullValue) {

@Override
protected List<Parameter<?>> getParameters() {
return Arrays.asList(indexed, hasDocValues, stored, ignoreMalformed, meta, scalingFactor, coerce, nullValue);
return Arrays.asList(indexed, hasDocValues, stored, ignoreMalformed, meta, scalingFactor, coerce, nullValue, skiplist);
}

@Override
Expand All @@ -158,6 +164,7 @@ public ScaledFloatFieldMapper build(BuilderContext context) {
indexed.getValue(),
stored.getValue(),
hasDocValues.getValue(),
skiplist.getValue(),
meta.getValue(),
scalingFactor.getValue(),
nullValue.getValue()
Expand All @@ -182,23 +189,27 @@ public static final class ScaledFloatFieldType extends SimpleMappedFieldType imp

private final double scalingFactor;
private final Double nullValue;
private final boolean skiplist;

public ScaledFloatFieldType(
String name,
boolean indexed,
boolean stored,
boolean hasDocValues,
boolean skiplist,
Map<String, String> meta,
double scalingFactor,
Double nullValue
) {
super(name, indexed, stored, hasDocValues, TextSearchInfo.SIMPLE_MATCH_ONLY, meta);
this.skiplist = skiplist;
this.scalingFactor = scalingFactor;
this.nullValue = nullValue;
}

public ScaledFloatFieldType(String name, double scalingFactor) {
this(name, true, false, true, Collections.emptyMap(), scalingFactor, null);
// TODO: enable skiplist by default
this(name, true, false, true, false, Collections.emptyMap(), scalingFactor, null);
}

@Override
Expand Down Expand Up @@ -384,9 +395,9 @@ public double toDoubleValue(long value) {
private final boolean indexed;
private final boolean hasDocValues;
private final boolean stored;
private final boolean skiplist;
private final Double nullValue;
private final double scalingFactor;

private final boolean ignoreMalformedByDefault;
private final boolean coerceByDefault;

Expand All @@ -401,6 +412,7 @@ private ScaledFloatFieldMapper(
this.indexed = builder.indexed.getValue();
this.hasDocValues = builder.hasDocValues.getValue();
this.stored = builder.stored.getValue();
this.skiplist = builder.skiplist.getValue();
this.scalingFactor = builder.scalingFactor.getValue();
this.nullValue = builder.nullValue.getValue();
this.ignoreMalformed = builder.ignoreMalformed.getValue();
Expand Down Expand Up @@ -491,7 +503,7 @@ protected void parseCreateField(ParseContext context) throws IOException {
scaledValue,
indexed,
hasDocValues,
false,
skiplist,
stored
);
context.doc().addAll(fields);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
import java.util.List;
import java.util.Map;

import static org.opensearch.common.xcontent.support.XContentMapValues.nodeBooleanValue;
import static org.opensearch.common.xcontent.support.XContentMapValues.nodeIntegerValue;

/**
Expand Down Expand Up @@ -77,6 +78,13 @@ public static class Builder extends ParametrizedFieldMapper.Builder {
m -> toType(m).enablePositionIncrements,
true
);
private final Parameter<Boolean> skiplist = new Parameter<>(
"skip_list",
false,
() -> false,
(n, c, o) -> nodeBooleanValue(o),
m -> toType(m).skiplist
);

private final Parameter<Map<String, String>> meta = Parameter.metaParam();

Expand All @@ -86,7 +94,7 @@ public Builder(String name) {

@Override
protected List<Parameter<?>> getParameters() {
return Arrays.asList(index, hasDocValues, store, analyzer, nullValue, enablePositionIncrements, meta);
return Arrays.asList(index, hasDocValues, store, analyzer, nullValue, enablePositionIncrements, meta, skiplist);
}

@Override
Expand All @@ -99,6 +107,7 @@ public TokenCountFieldMapper build(BuilderContext context) {
index.getValue(),
store.getValue(),
hasDocValues.getValue(),
skiplist.getValue(),
nullValue.getValue(),
meta.getValue()
);
Expand All @@ -113,10 +122,11 @@ static class TokenCountFieldType extends NumberFieldMapper.NumberFieldType {
boolean isSearchable,
boolean isStored,
boolean hasDocValues,
boolean skiplist,
Number nullValue,
Map<String, String> meta
) {
super(name, NumberFieldMapper.NumberType.INTEGER, isSearchable, isStored, hasDocValues, false, nullValue, meta);
super(name, NumberFieldMapper.NumberType.INTEGER, isSearchable, isStored, hasDocValues, skiplist, false, nullValue, meta);
}

@Override
Expand All @@ -132,6 +142,7 @@ public ValueFetcher valueFetcher(QueryShardContext context, SearchLookup searchL

private final boolean index;
private final boolean hasDocValues;
private final boolean skiplist;
private final boolean store;
private final NamedAnalyzer analyzer;
private final boolean enablePositionIncrements;
Expand All @@ -150,6 +161,7 @@ protected TokenCountFieldMapper(
this.nullValue = builder.nullValue.getValue();
this.index = builder.index.getValue();
this.hasDocValues = builder.hasDocValues.getValue();
this.skiplist = builder.skiplist.getValue();
this.store = builder.store.getValue();
}

Expand All @@ -174,7 +186,9 @@ protected void parseCreateField(ParseContext context) throws IOException {
}

context.doc()
.addAll(NumberFieldMapper.NumberType.INTEGER.createFields(fieldType().name(), tokenCount, index, hasDocValues, false, store));
.addAll(
NumberFieldMapper.NumberType.INTEGER.createFields(fieldType().name(), tokenCount, index, hasDocValues, skiplist, store)
);
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
import org.apache.lucene.document.SortedNumericDocValuesField;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.DocValuesSkipIndexType;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
Expand Down Expand Up @@ -85,6 +86,7 @@ protected void registerParameters(ParameterChecker checker) throws IOException {
b.field("scaling_factor", 5.0);
}));
checker.registerConflictCheck("doc_values", b -> b.field("doc_values", false));
checker.registerConflictCheck("skip_list", b -> b.field("skip_list", true));
checker.registerConflictCheck("index", b -> b.field("index", false));
checker.registerConflictCheck("store", b -> b.field("store", true));
checker.registerConflictCheck("null_value", b -> b.field("null_value", 1));
Expand Down Expand Up @@ -489,4 +491,37 @@ public void testScaledFloatEncodePoint() {
assertEquals(1051, decodedUp); // 10.5 scaled = 1050, then +1 = 1051 (represents 10.51)
assertEquals(1049, decodedDown); // 10.5 scaled = 1050, then -1 = 1049 (represents 10.49)
}

public void testSkiplistParameter() throws IOException {
// Test default value (none)
DocumentMapper defaultMapper = createDocumentMapper(
fieldMapping(b -> b.field("type", "scaled_float").field("scaling_factor", 100))
);
ParsedDocument doc = defaultMapper.parse(source(b -> b.field("field", 123.45)));
IndexableField[] fields = doc.rootDoc().getFields("field");
assertEquals(2, fields.length); // point field + doc values field
IndexableField dvField = fields[1];
assertEquals(DocValuesType.SORTED_NUMERIC, dvField.fieldType().docValuesType());
assertEquals(DocValuesSkipIndexType.NONE, dvField.fieldType().docValuesSkipIndexType());

// Test skiplist = "skip_list"
DocumentMapper skiplistMapper = createDocumentMapper(
fieldMapping(b -> b.field("type", "scaled_float").field("scaling_factor", 100).field("skip_list", "true"))
);
doc = skiplistMapper.parse(source(b -> b.field("field", 123.45)));
fields = doc.rootDoc().getFields("field");
assertEquals(2, fields.length);
dvField = fields[1];
assertEquals(DocValuesType.SORTED_NUMERIC, dvField.fieldType().docValuesType());
assertEquals(DocValuesSkipIndexType.RANGE, dvField.fieldType().docValuesSkipIndexType());

// Test invalid value
MapperParsingException e = expectThrows(
MapperParsingException.class,
() -> createDocumentMapper(
fieldMapping(b -> b.field("type", "scaled_float").field("scaling_factor", 100).field("skip_list", "invalid"))
)
);
assertThat(e.getMessage(), containsString("Failed to parse value [invalid] as only [true] or [false] are allowed"));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
package org.opensearch.index.mapper;

import org.apache.lucene.document.Document;
import org.apache.lucene.document.DoublePoint;
import org.apache.lucene.document.LongField;
import org.apache.lucene.document.LongPoint;
import org.apache.lucene.document.SortedNumericDocValuesField;
Expand Down Expand Up @@ -82,6 +83,79 @@ public void testTermsQuery() {
assertEquals(LongField.newSetQuery("scaled_float", scaledValue1, scaledValue2), ft.termsQuery(Arrays.asList(value1, value2), null));
}

public void testRangeQuery() throws IOException {
// make sure the accuracy loss of scaled floats only occurs at index time
// this test checks that searching scaled floats yields the same results as
// searching doubles that are rounded to the closest half float
ScaledFloatFieldMapper.ScaledFloatFieldType ft = new ScaledFloatFieldMapper.ScaledFloatFieldType(
"scaled_float",
true,
false,
false,
false,
Collections.emptyMap(),
0.1 + randomDouble() * 100,
null
);
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(null));
long[] scaled_floats = new long[1000];
double[] doubles = new double[1000];
final int numDocs = 1000;
for (int i = 0; i < numDocs; ++i) {
Document doc = new Document();
double value = (randomDouble() * 2 - 1) * 10000;
long scaledValue = Math.round(value * ft.getScalingFactor());
double rounded = scaledValue / ft.getScalingFactor();
scaled_floats[i] = scaledValue;
doubles[i] = rounded;
doc.add(new LongPoint("scaled_float", scaledValue));
doc.add(new DoublePoint("double", rounded));
w.addDocument(doc);
}
final DirectoryReader reader = DirectoryReader.open(w);
w.close();
IndexSearcher searcher = newSearcher(reader);
final int numQueries = 1000;
for (int i = 0; i < numQueries; ++i) {
Double l = randomBoolean() ? null : (randomDouble() * 2 - 1) * 10000;
Double u = randomBoolean() ? null : (randomDouble() * 2 - 1) * 10000;
boolean includeLower = randomBoolean();
boolean includeUpper = randomBoolean();

// Use the same rounding logic for query bounds as used in indexing
Double queryL = l;
Double queryU = u;
if (l != null) {
long scaledL = Math.round(l * ft.getScalingFactor());
queryL = scaledL / ft.getScalingFactor();
}
if (u != null) {
long scaledU = Math.round(u * ft.getScalingFactor());
queryU = scaledU / ft.getScalingFactor();
}

Query doubleQ = NumberFieldMapper.NumberType.DOUBLE.rangeQuery(
"double",
queryL,
queryU,
includeLower,
includeUpper,
false,
true,
MOCK_QSC
);
Query scaledFloatQ = ft.rangeQuery(queryL, queryU, includeLower, includeUpper, MOCK_QSC);
int expectedCount = searcher.count(doubleQ);
int scaledCount = searcher.count(scaledFloatQ);

// System.out.println("l=" + l + " queryL=" + queryL + " u=" + u + " queryU=" + queryU + " scalingFactor=" +
// ft.getScalingFactor() + " expected= "+ expectedCount + " count= " + scaledCount);
assertEquals(expectedCount, scaledCount);
}
IOUtils.close(reader, dir);
}

public void testRoundsUpperBoundCorrectly() {
ScaledFloatFieldMapper.ScaledFloatFieldType ft = new ScaledFloatFieldMapper.ScaledFloatFieldType("scaled_float", 100);
Query scaledFloatQ = ft.rangeQuery(null, 0.1, true, false, MOCK_QSC);
Expand Down Expand Up @@ -229,6 +303,7 @@ public void testLargeNumberIndexingAndQuerying() throws IOException {
true,
false,
true,
true,
Collections.emptyMap(),
scalingFactor,
null
Expand Down
Loading
Loading