Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Star Tree] Scaled Float Support #15442

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
import org.opensearch.common.xcontent.support.XContentMapValues;
import org.opensearch.core.xcontent.XContentParser;
import org.opensearch.core.xcontent.XContentParser.Token;
import org.opensearch.index.compositeindex.datacube.DimensionType;
import org.opensearch.index.fielddata.FieldData;
import org.opensearch.index.fielddata.IndexFieldData;
import org.opensearch.index.fielddata.IndexNumericFieldData;
Expand All @@ -71,10 +72,12 @@
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.function.Supplier;

/** A {@link FieldMapper} for scaled floats. Values are internally multiplied
* by a scaling factor and rounded to the closest long. */
* by a scaling factor and rounded to the closest long.
*/
public class ScaledFloatFieldMapper extends ParametrizedFieldMapper {

public static final String CONTENT_TYPE = "scaled_float";
Expand Down Expand Up @@ -162,11 +165,21 @@
);
return new ScaledFloatFieldMapper(name, type, multiFieldsBuilder.build(this, context), copyTo.build(), this);
}

@Override
sarthakaggarwal97 marked this conversation as resolved.
Show resolved Hide resolved
public Optional<DimensionType> getSupportedDataCubeDimensionType() {
return Optional.of(DimensionType.NUMERIC);
}

@Override
public boolean isDataCubeMetricSupported() {
return true;
}
}

public static final TypeParser PARSER = new TypeParser((n, c) -> new Builder(n, c.getSettings()));

public static final class ScaledFloatFieldType extends SimpleMappedFieldType implements NumericPointEncoder {
public static final class ScaledFloatFieldType extends SimpleMappedFieldType implements NumericPointEncoder, FieldValueConverter {

private final double scalingFactor;
private final Double nullValue;
Expand Down Expand Up @@ -340,6 +353,12 @@
private double scale(Object input) {
return new BigDecimal(Double.toString(parse(input))).multiply(BigDecimal.valueOf(scalingFactor)).doubleValue();
}

@Override
public double toDoubleValue(long value) {
double inverseScalingFactor = 1d / scalingFactor;
return value * inverseScalingFactor;

Check warning on line 360 in modules/mapper-extras/src/main/java/org/opensearch/index/mapper/ScaledFloatFieldMapper.java

View check run for this annotation

Codecov / codecov/patch

modules/mapper-extras/src/main/java/org/opensearch/index/mapper/ScaledFloatFieldMapper.java#L359-L360

Added lines #L359 - L360 were not covered by tests
}
}

private final Explicit<Boolean> ignoreMalformed;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,18 +34,24 @@

import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.IndexableField;
import org.opensearch.common.settings.Settings;
import org.opensearch.common.util.FeatureFlags;
import org.opensearch.common.xcontent.XContentFactory;
import org.opensearch.core.common.bytes.BytesReference;
import org.opensearch.core.xcontent.MediaTypeRegistry;
import org.opensearch.core.xcontent.XContentBuilder;
import org.opensearch.index.compositeindex.datacube.startree.StarTreeIndexSettings;
import org.opensearch.plugins.Plugin;
import org.junit.AfterClass;
import org.junit.BeforeClass;

import java.io.IOException;
import java.util.Arrays;
import java.util.Collection;
import java.util.List;

import static java.util.Collections.singletonList;
import static org.opensearch.common.util.FeatureFlags.STAR_TREE_INDEX;
import static org.hamcrest.Matchers.containsString;

public class ScaledFloatFieldMapperTests extends MapperTestCase {
Expand Down Expand Up @@ -91,24 +97,112 @@ public void testExistsQueryDocValuesDisabled() throws IOException {
assertParseMinimalWarnings();
}

public void testDefaults() throws Exception {
XContentBuilder mapping = fieldMapping(b -> b.field("type", "scaled_float").field("scaling_factor", 10.0));
@BeforeClass
public static void createMapper() {
FeatureFlags.initializeFeatureFlags(Settings.builder().put(STAR_TREE_INDEX, "true").build());
}

@AfterClass
public static void clearMapper() {
FeatureFlags.initializeFeatureFlags(Settings.EMPTY);
}

public void testScaledFloatWithStarTree() throws Exception {

double scalingFactorField1 = randomDouble() * 100;
double scalingFactorField2 = randomDouble() * 100;
double scalingFactorField3 = randomDouble() * 100;

XContentBuilder mapping = getStarTreeMappingWithScaledFloat(scalingFactorField1, scalingFactorField2, scalingFactorField3);
DocumentMapper mapper = createDocumentMapper(mapping);
assertEquals(mapping.toString(), mapper.mappingSource().toString());
assertTrue(mapping.toString().contains("startree"));

ParsedDocument doc = mapper.parse(source(b -> b.field("field", 123)));
IndexableField[] fields = doc.rootDoc().getFields("field");
long randomLongField1 = randomLong();
long randomLongField2 = randomLong();
long randomLongField3 = randomLong();
ParsedDocument doc = mapper.parse(
source(b -> b.field("field1", randomLongField1).field("field2", randomLongField2).field("field3", randomLongField3))
);
validateScaledFloatFields(doc, "field1", randomLongField1, scalingFactorField1);
validateScaledFloatFields(doc, "field2", randomLongField2, scalingFactorField2);
validateScaledFloatFields(doc, "field3", randomLongField3, scalingFactorField3);
}

@Override
protected Settings getIndexSettings() {
return Settings.builder()
.put(StarTreeIndexSettings.IS_COMPOSITE_INDEX_SETTING.getKey(), true)
.put(super.getIndexSettings())
.build();
}

private static void validateScaledFloatFields(ParsedDocument doc, String field, long value, double scalingFactor) {
IndexableField[] fields = doc.rootDoc().getFields(field);
assertEquals(2, fields.length);
IndexableField pointField = fields[0];
assertEquals(1, pointField.fieldType().pointDimensionCount());
assertFalse(pointField.fieldType().stored());
assertEquals(1230, pointField.numericValue().longValue());
assertEquals((long) (value * scalingFactor), pointField.numericValue().longValue());
IndexableField dvField = fields[1];
assertEquals(DocValuesType.SORTED_NUMERIC, dvField.fieldType().docValuesType());
assertEquals(1230, dvField.numericValue().longValue());
assertEquals((long) (value * scalingFactor), dvField.numericValue().longValue());
assertFalse(dvField.fieldType().stored());
}

private XContentBuilder getStarTreeMappingWithScaledFloat(
double scalingFactorField1,
double scalingFactorField2,
double scalingFactorField3
) throws IOException {
return topMapping(b -> {
b.startObject("composite");
b.startObject("startree");
b.field("type", "star_tree");
b.startObject("config");
b.field("max_leaf_docs", 100);
b.startArray("ordered_dimensions");
b.startObject();
b.field("name", "field1");
b.endObject();
b.startObject();
b.field("name", "field2");
b.endObject();
b.endArray();
b.startArray("metrics");
b.startObject();
b.field("name", "field3");
b.startArray("stats");
b.value("sum");
b.value("value_count");
b.endArray();
b.endObject();
b.endArray();
b.endObject();
b.endObject();
b.endObject();
b.startObject("properties");
b.startObject("field1");
b.field("type", "scaled_float").field("scaling_factor", scalingFactorField1);
b.endObject();
b.startObject("field2");
b.field("type", "scaled_float").field("scaling_factor", scalingFactorField2);
b.endObject();
b.startObject("field3");
b.field("type", "scaled_float").field("scaling_factor", scalingFactorField3);
b.endObject();
b.endObject();
});
}

public void testDefaults() throws Exception {
XContentBuilder mapping = fieldMapping(b -> b.field("type", "scaled_float").field("scaling_factor", 10.0));
DocumentMapper mapper = createDocumentMapper(mapping);
assertEquals(mapping.toString(), mapper.mappingSource().toString());

ParsedDocument doc = mapper.parse(source(b -> b.field("field", 123)));
validateScaledFloatFields(doc, "field", 123, 10.0);
}

public void testMissingScalingFactor() {
Exception e = expectThrows(
MapperParsingException.class,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
import org.opensearch.action.index.IndexResponse;
import org.opensearch.action.search.SearchResponse;
import org.opensearch.action.support.master.AcknowledgedResponse;
import org.opensearch.common.Rounding;
import org.opensearch.common.settings.Settings;
import org.opensearch.common.util.FeatureFlags;
import org.opensearch.core.common.unit.ByteSizeUnit;
Expand All @@ -23,7 +22,6 @@
import org.opensearch.index.IndexService;
import org.opensearch.index.IndexSettings;
import org.opensearch.index.compositeindex.CompositeIndexSettings;
import org.opensearch.index.compositeindex.datacube.DateDimension;
import org.opensearch.index.compositeindex.datacube.MetricStat;
import org.opensearch.index.compositeindex.datacube.startree.StarTreeFieldConfiguration;
import org.opensearch.index.compositeindex.datacube.startree.StarTreeIndexSettings;
Expand Down Expand Up @@ -62,7 +60,10 @@ private static XContentBuilder createMinimalTestMapping(boolean invalidDim, bool
.startObject("config")
.startArray("ordered_dimensions")
.startObject()
.field("name", "timestamp")
.field("name", "numeric_dv_1")
.endObject()
.startObject()
.field("name", "numeric_dv_2")
.endObject()
.startObject()
.field("name", getDim(invalidDim, keywordDim))
Expand All @@ -84,6 +85,14 @@ private static XContentBuilder createMinimalTestMapping(boolean invalidDim, bool
.field("type", "integer")
.field("doc_values", true)
.endObject()
.startObject("numeric_dv_1")
.field("type", "integer")
.field("doc_values", true)
.endObject()
.startObject("numeric_dv_2")
.field("type", "integer")
.field("doc_values", true)
.endObject()
.startObject("numeric")
.field("type", "integer")
.field("doc_values", false)
Expand Down Expand Up @@ -112,11 +121,7 @@ private static XContentBuilder createMaxDimTestMapping() {
.startObject("config")
.startArray("ordered_dimensions")
.startObject()
.field("name", "timestamp")
.startArray("calendar_intervals")
.value("day")
.value("month")
.endArray()
.field("name", "dim4")
.endObject()
.startObject()
.field("name", "dim2")
Expand Down Expand Up @@ -201,7 +206,7 @@ private static XContentBuilder createUpdateTestMapping(boolean changeDim, boolea
.startObject("config")
.startArray("ordered_dimensions")
.startObject()
.field("name", "timestamp")
.field("name", "numeric_dv1")
.endObject()
.startObject()
.field("name", changeDim ? "numeric_new" : getDim(false, false))
Expand All @@ -223,6 +228,10 @@ private static XContentBuilder createUpdateTestMapping(boolean changeDim, boolea
.field("type", "integer")
.field("doc_values", true)
.endObject()
.startObject("numeric_dv1")
.field("type", "integer")
.field("doc_values", true)
.endObject()
.startObject("numeric")
.field("type", "integer")
.field("doc_values", false)
Expand Down Expand Up @@ -256,7 +265,7 @@ private XContentBuilder getMappingWithDuplicateFields(boolean isDuplicateDim, bo
.startObject("config")
.startArray("ordered_dimensions")
.startObject()
.field("name", "timestamp")
.field("name", "numeric_dv2")
.endObject()
.startObject()
.field("name", "numeric_dv")
Expand Down Expand Up @@ -284,6 +293,10 @@ private XContentBuilder getMappingWithDuplicateFields(boolean isDuplicateDim, bo
.field("type", "integer")
.field("doc_values", true)
.endObject()
.startObject("numeric_dv2")
.field("type", "integer")
.field("doc_values", true)
.endObject()
.startObject("numeric_dv1")
.field("type", "integer")
.field("doc_values", true)
Expand Down Expand Up @@ -328,15 +341,8 @@ public void testValidCompositeIndex() {
for (CompositeMappedFieldType ft : fts) {
assertTrue(ft instanceof StarTreeMapper.StarTreeFieldType);
StarTreeMapper.StarTreeFieldType starTreeFieldType = (StarTreeMapper.StarTreeFieldType) ft;
assertEquals("timestamp", starTreeFieldType.getDimensions().get(0).getField());
assertTrue(starTreeFieldType.getDimensions().get(0) instanceof DateDimension);
DateDimension dateDim = (DateDimension) starTreeFieldType.getDimensions().get(0);
List<Rounding.DateTimeUnit> expectedTimeUnits = Arrays.asList(
Rounding.DateTimeUnit.MINUTES_OF_HOUR,
Rounding.DateTimeUnit.HOUR_OF_DAY
);
assertEquals(expectedTimeUnits, dateDim.getIntervals());
assertEquals("numeric_dv", starTreeFieldType.getDimensions().get(1).getField());
assertEquals("numeric_dv_1", starTreeFieldType.getDimensions().get(0).getField());
assertEquals("numeric_dv_2", starTreeFieldType.getDimensions().get(1).getField());
assertEquals(2, starTreeFieldType.getMetrics().size());
assertEquals("numeric_dv", starTreeFieldType.getMetrics().get(0).getField());

Expand Down Expand Up @@ -496,15 +502,8 @@ public void testUpdateIndexWhenMappingIsSame() {
for (CompositeMappedFieldType ft : fts) {
assertTrue(ft instanceof StarTreeMapper.StarTreeFieldType);
StarTreeMapper.StarTreeFieldType starTreeFieldType = (StarTreeMapper.StarTreeFieldType) ft;
assertEquals("timestamp", starTreeFieldType.getDimensions().get(0).getField());
assertTrue(starTreeFieldType.getDimensions().get(0) instanceof DateDimension);
DateDimension dateDim = (DateDimension) starTreeFieldType.getDimensions().get(0);
List<Rounding.DateTimeUnit> expectedTimeUnits = Arrays.asList(
Rounding.DateTimeUnit.MINUTES_OF_HOUR,
Rounding.DateTimeUnit.HOUR_OF_DAY
);
assertEquals(expectedTimeUnits, dateDim.getIntervals());
assertEquals("numeric_dv", starTreeFieldType.getDimensions().get(1).getField());
assertEquals("numeric_dv_1", starTreeFieldType.getDimensions().get(0).getField());
assertEquals("numeric_dv_2", starTreeFieldType.getDimensions().get(1).getField());
assertEquals("numeric_dv", starTreeFieldType.getMetrics().get(0).getField());

// Assert default metrics
Expand Down Expand Up @@ -570,24 +569,6 @@ public void testMaxMetricsCompositeIndex() {
);
}

public void testMaxCalendarIntervalsCompositeIndex() {
MapperParsingException ex = expectThrows(
MapperParsingException.class,
() -> prepareCreate(TEST_INDEX).setMapping(createMaxDimTestMapping())
.setSettings(
Settings.builder()
.put(StarTreeIndexSettings.STAR_TREE_MAX_DATE_INTERVALS_SETTING.getKey(), 1)
.put(StarTreeIndexSettings.IS_COMPOSITE_INDEX_SETTING.getKey(), true)
.put(IndexSettings.INDEX_TRANSLOG_FLUSH_THRESHOLD_SIZE_SETTING.getKey(), new ByteSizeValue(512, ByteSizeUnit.MB))
)
.get()
);
assertEquals(
"Failed to parse mapping [_doc]: At most [1] calendar intervals are allowed in dimension [timestamp]",
ex.getMessage()
);
}

public void testUnsupportedDim() {
MapperParsingException ex = expectThrows(
MapperParsingException.class,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,7 @@
import org.opensearch.common.annotation.ExperimentalApi;
import org.opensearch.common.xcontent.support.XContentMapValues;
import org.opensearch.index.compositeindex.datacube.startree.StarTreeIndexSettings;
import org.opensearch.index.mapper.DateFieldMapper;
import org.opensearch.index.mapper.Mapper;
import org.opensearch.index.mapper.NumberFieldMapper;

import java.util.ArrayList;
import java.util.List;
Expand Down Expand Up @@ -55,11 +53,13 @@ public static Dimension parseAndCreateDimension(
Map<String, Object> dimensionMap,
Mapper.TypeParser.ParserContext c
) {
if (builder instanceof DateFieldMapper.Builder) {
if (builder.getSupportedDataCubeDimensionType().isPresent()
&& builder.getSupportedDataCubeDimensionType().get().equals(DimensionType.DATE)) {
return parseAndCreateDateDimension(name, dimensionMap, c);
} else if (builder instanceof NumberFieldMapper.Builder) {
return new NumericDimension(name);
}
} else if (builder.getSupportedDataCubeDimensionType().isPresent()
&& builder.getSupportedDataCubeDimensionType().get().equals(DimensionType.NUMERIC)) {
return new NumericDimension(name);
}
throw new IllegalArgumentException(
String.format(Locale.ROOT, "unsupported field type associated with star tree dimension [%s]", name)
);
Expand Down
Loading
Loading