Skip to content

Commit

Permalink
[Star Tree] Scaled Float Support (opensearch-project#15442)
Browse files Browse the repository at this point in the history
---------

Signed-off-by: Sarthak Aggarwal <sarthagg@amazon.com>
  • Loading branch information
sarthakaggarwal97 authored Sep 4, 2024
1 parent 48cf5f0 commit 23f5c2f
Show file tree
Hide file tree
Showing 43 changed files with 575 additions and 483 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
import org.opensearch.common.xcontent.support.XContentMapValues;
import org.opensearch.core.xcontent.XContentParser;
import org.opensearch.core.xcontent.XContentParser.Token;
import org.opensearch.index.compositeindex.datacube.DimensionType;
import org.opensearch.index.fielddata.FieldData;
import org.opensearch.index.fielddata.IndexFieldData;
import org.opensearch.index.fielddata.IndexNumericFieldData;
Expand All @@ -71,10 +72,12 @@
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.function.Supplier;

/** A {@link FieldMapper} for scaled floats. Values are internally multiplied
* by a scaling factor and rounded to the closest long. */
* by a scaling factor and rounded to the closest long.
*/
public class ScaledFloatFieldMapper extends ParametrizedFieldMapper {

public static final String CONTENT_TYPE = "scaled_float";
Expand Down Expand Up @@ -162,11 +165,21 @@ public ScaledFloatFieldMapper build(BuilderContext context) {
);
return new ScaledFloatFieldMapper(name, type, multiFieldsBuilder.build(this, context), copyTo.build(), this);
}

@Override
public Optional<DimensionType> getSupportedDataCubeDimensionType() {
return Optional.of(DimensionType.NUMERIC);
}

@Override
public boolean isDataCubeMetricSupported() {
return true;
}
}

public static final TypeParser PARSER = new TypeParser((n, c) -> new Builder(n, c.getSettings()));

public static final class ScaledFloatFieldType extends SimpleMappedFieldType implements NumericPointEncoder {
public static final class ScaledFloatFieldType extends SimpleMappedFieldType implements NumericPointEncoder, FieldValueConverter {

private final double scalingFactor;
private final Double nullValue;
Expand Down Expand Up @@ -340,6 +353,12 @@ public DocValueFormat docValueFormat(String format, ZoneId timeZone) {
private double scale(Object input) {
return new BigDecimal(Double.toString(parse(input))).multiply(BigDecimal.valueOf(scalingFactor)).doubleValue();
}

@Override
public double toDoubleValue(long value) {
double inverseScalingFactor = 1d / scalingFactor;
return value * inverseScalingFactor;
}
}

private final Explicit<Boolean> ignoreMalformed;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,18 +34,24 @@

import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.IndexableField;
import org.opensearch.common.settings.Settings;
import org.opensearch.common.util.FeatureFlags;
import org.opensearch.common.xcontent.XContentFactory;
import org.opensearch.core.common.bytes.BytesReference;
import org.opensearch.core.xcontent.MediaTypeRegistry;
import org.opensearch.core.xcontent.XContentBuilder;
import org.opensearch.index.compositeindex.datacube.startree.StarTreeIndexSettings;
import org.opensearch.plugins.Plugin;
import org.junit.AfterClass;
import org.junit.BeforeClass;

import java.io.IOException;
import java.util.Arrays;
import java.util.Collection;
import java.util.List;

import static java.util.Collections.singletonList;
import static org.opensearch.common.util.FeatureFlags.STAR_TREE_INDEX;
import static org.hamcrest.Matchers.containsString;

public class ScaledFloatFieldMapperTests extends MapperTestCase {
Expand Down Expand Up @@ -91,24 +97,112 @@ public void testExistsQueryDocValuesDisabled() throws IOException {
assertParseMinimalWarnings();
}

public void testDefaults() throws Exception {
XContentBuilder mapping = fieldMapping(b -> b.field("type", "scaled_float").field("scaling_factor", 10.0));
@BeforeClass
public static void createMapper() {
FeatureFlags.initializeFeatureFlags(Settings.builder().put(STAR_TREE_INDEX, "true").build());
}

@AfterClass
public static void clearMapper() {
FeatureFlags.initializeFeatureFlags(Settings.EMPTY);
}

public void testScaledFloatWithStarTree() throws Exception {

double scalingFactorField1 = randomDouble() * 100;
double scalingFactorField2 = randomDouble() * 100;
double scalingFactorField3 = randomDouble() * 100;

XContentBuilder mapping = getStarTreeMappingWithScaledFloat(scalingFactorField1, scalingFactorField2, scalingFactorField3);
DocumentMapper mapper = createDocumentMapper(mapping);
assertEquals(mapping.toString(), mapper.mappingSource().toString());
assertTrue(mapping.toString().contains("startree"));

ParsedDocument doc = mapper.parse(source(b -> b.field("field", 123)));
IndexableField[] fields = doc.rootDoc().getFields("field");
long randomLongField1 = randomLong();
long randomLongField2 = randomLong();
long randomLongField3 = randomLong();
ParsedDocument doc = mapper.parse(
source(b -> b.field("field1", randomLongField1).field("field2", randomLongField2).field("field3", randomLongField3))
);
validateScaledFloatFields(doc, "field1", randomLongField1, scalingFactorField1);
validateScaledFloatFields(doc, "field2", randomLongField2, scalingFactorField2);
validateScaledFloatFields(doc, "field3", randomLongField3, scalingFactorField3);
}

@Override
protected Settings getIndexSettings() {
return Settings.builder()
.put(StarTreeIndexSettings.IS_COMPOSITE_INDEX_SETTING.getKey(), true)
.put(super.getIndexSettings())
.build();
}

private static void validateScaledFloatFields(ParsedDocument doc, String field, long value, double scalingFactor) {
IndexableField[] fields = doc.rootDoc().getFields(field);
assertEquals(2, fields.length);
IndexableField pointField = fields[0];
assertEquals(1, pointField.fieldType().pointDimensionCount());
assertFalse(pointField.fieldType().stored());
assertEquals(1230, pointField.numericValue().longValue());
assertEquals((long) (value * scalingFactor), pointField.numericValue().longValue());
IndexableField dvField = fields[1];
assertEquals(DocValuesType.SORTED_NUMERIC, dvField.fieldType().docValuesType());
assertEquals(1230, dvField.numericValue().longValue());
assertEquals((long) (value * scalingFactor), dvField.numericValue().longValue());
assertFalse(dvField.fieldType().stored());
}

private XContentBuilder getStarTreeMappingWithScaledFloat(
double scalingFactorField1,
double scalingFactorField2,
double scalingFactorField3
) throws IOException {
return topMapping(b -> {
b.startObject("composite");
b.startObject("startree");
b.field("type", "star_tree");
b.startObject("config");
b.field("max_leaf_docs", 100);
b.startArray("ordered_dimensions");
b.startObject();
b.field("name", "field1");
b.endObject();
b.startObject();
b.field("name", "field2");
b.endObject();
b.endArray();
b.startArray("metrics");
b.startObject();
b.field("name", "field3");
b.startArray("stats");
b.value("sum");
b.value("value_count");
b.endArray();
b.endObject();
b.endArray();
b.endObject();
b.endObject();
b.endObject();
b.startObject("properties");
b.startObject("field1");
b.field("type", "scaled_float").field("scaling_factor", scalingFactorField1);
b.endObject();
b.startObject("field2");
b.field("type", "scaled_float").field("scaling_factor", scalingFactorField2);
b.endObject();
b.startObject("field3");
b.field("type", "scaled_float").field("scaling_factor", scalingFactorField3);
b.endObject();
b.endObject();
});
}

public void testDefaults() throws Exception {
XContentBuilder mapping = fieldMapping(b -> b.field("type", "scaled_float").field("scaling_factor", 10.0));
DocumentMapper mapper = createDocumentMapper(mapping);
assertEquals(mapping.toString(), mapper.mappingSource().toString());

ParsedDocument doc = mapper.parse(source(b -> b.field("field", 123)));
validateScaledFloatFields(doc, "field", 123, 10.0);
}

public void testMissingScalingFactor() {
Exception e = expectThrows(
MapperParsingException.class,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
import org.opensearch.action.index.IndexResponse;
import org.opensearch.action.search.SearchResponse;
import org.opensearch.action.support.master.AcknowledgedResponse;
import org.opensearch.common.Rounding;
import org.opensearch.common.settings.Settings;
import org.opensearch.common.util.FeatureFlags;
import org.opensearch.core.common.unit.ByteSizeUnit;
Expand All @@ -23,7 +22,6 @@
import org.opensearch.index.IndexService;
import org.opensearch.index.IndexSettings;
import org.opensearch.index.compositeindex.CompositeIndexSettings;
import org.opensearch.index.compositeindex.datacube.DateDimension;
import org.opensearch.index.compositeindex.datacube.MetricStat;
import org.opensearch.index.compositeindex.datacube.startree.StarTreeFieldConfiguration;
import org.opensearch.index.compositeindex.datacube.startree.StarTreeIndexSettings;
Expand Down Expand Up @@ -62,7 +60,10 @@ private static XContentBuilder createMinimalTestMapping(boolean invalidDim, bool
.startObject("config")
.startArray("ordered_dimensions")
.startObject()
.field("name", "timestamp")
.field("name", "numeric_dv_1")
.endObject()
.startObject()
.field("name", "numeric_dv_2")
.endObject()
.startObject()
.field("name", getDim(invalidDim, keywordDim))
Expand All @@ -84,6 +85,14 @@ private static XContentBuilder createMinimalTestMapping(boolean invalidDim, bool
.field("type", "integer")
.field("doc_values", true)
.endObject()
.startObject("numeric_dv_1")
.field("type", "integer")
.field("doc_values", true)
.endObject()
.startObject("numeric_dv_2")
.field("type", "integer")
.field("doc_values", true)
.endObject()
.startObject("numeric")
.field("type", "integer")
.field("doc_values", false)
Expand Down Expand Up @@ -112,11 +121,7 @@ private static XContentBuilder createMaxDimTestMapping() {
.startObject("config")
.startArray("ordered_dimensions")
.startObject()
.field("name", "timestamp")
.startArray("calendar_intervals")
.value("day")
.value("month")
.endArray()
.field("name", "dim4")
.endObject()
.startObject()
.field("name", "dim2")
Expand Down Expand Up @@ -201,7 +206,7 @@ private static XContentBuilder createUpdateTestMapping(boolean changeDim, boolea
.startObject("config")
.startArray("ordered_dimensions")
.startObject()
.field("name", "timestamp")
.field("name", "numeric_dv1")
.endObject()
.startObject()
.field("name", changeDim ? "numeric_new" : getDim(false, false))
Expand All @@ -223,6 +228,10 @@ private static XContentBuilder createUpdateTestMapping(boolean changeDim, boolea
.field("type", "integer")
.field("doc_values", true)
.endObject()
.startObject("numeric_dv1")
.field("type", "integer")
.field("doc_values", true)
.endObject()
.startObject("numeric")
.field("type", "integer")
.field("doc_values", false)
Expand Down Expand Up @@ -256,7 +265,7 @@ private XContentBuilder getMappingWithDuplicateFields(boolean isDuplicateDim, bo
.startObject("config")
.startArray("ordered_dimensions")
.startObject()
.field("name", "timestamp")
.field("name", "numeric_dv2")
.endObject()
.startObject()
.field("name", "numeric_dv")
Expand Down Expand Up @@ -284,6 +293,10 @@ private XContentBuilder getMappingWithDuplicateFields(boolean isDuplicateDim, bo
.field("type", "integer")
.field("doc_values", true)
.endObject()
.startObject("numeric_dv2")
.field("type", "integer")
.field("doc_values", true)
.endObject()
.startObject("numeric_dv1")
.field("type", "integer")
.field("doc_values", true)
Expand Down Expand Up @@ -328,15 +341,8 @@ public void testValidCompositeIndex() {
for (CompositeMappedFieldType ft : fts) {
assertTrue(ft instanceof StarTreeMapper.StarTreeFieldType);
StarTreeMapper.StarTreeFieldType starTreeFieldType = (StarTreeMapper.StarTreeFieldType) ft;
assertEquals("timestamp", starTreeFieldType.getDimensions().get(0).getField());
assertTrue(starTreeFieldType.getDimensions().get(0) instanceof DateDimension);
DateDimension dateDim = (DateDimension) starTreeFieldType.getDimensions().get(0);
List<Rounding.DateTimeUnit> expectedTimeUnits = Arrays.asList(
Rounding.DateTimeUnit.MINUTES_OF_HOUR,
Rounding.DateTimeUnit.HOUR_OF_DAY
);
assertEquals(expectedTimeUnits, dateDim.getIntervals());
assertEquals("numeric_dv", starTreeFieldType.getDimensions().get(1).getField());
assertEquals("numeric_dv_1", starTreeFieldType.getDimensions().get(0).getField());
assertEquals("numeric_dv_2", starTreeFieldType.getDimensions().get(1).getField());
assertEquals(2, starTreeFieldType.getMetrics().size());
assertEquals("numeric_dv", starTreeFieldType.getMetrics().get(0).getField());

Expand Down Expand Up @@ -496,15 +502,8 @@ public void testUpdateIndexWhenMappingIsSame() {
for (CompositeMappedFieldType ft : fts) {
assertTrue(ft instanceof StarTreeMapper.StarTreeFieldType);
StarTreeMapper.StarTreeFieldType starTreeFieldType = (StarTreeMapper.StarTreeFieldType) ft;
assertEquals("timestamp", starTreeFieldType.getDimensions().get(0).getField());
assertTrue(starTreeFieldType.getDimensions().get(0) instanceof DateDimension);
DateDimension dateDim = (DateDimension) starTreeFieldType.getDimensions().get(0);
List<Rounding.DateTimeUnit> expectedTimeUnits = Arrays.asList(
Rounding.DateTimeUnit.MINUTES_OF_HOUR,
Rounding.DateTimeUnit.HOUR_OF_DAY
);
assertEquals(expectedTimeUnits, dateDim.getIntervals());
assertEquals("numeric_dv", starTreeFieldType.getDimensions().get(1).getField());
assertEquals("numeric_dv_1", starTreeFieldType.getDimensions().get(0).getField());
assertEquals("numeric_dv_2", starTreeFieldType.getDimensions().get(1).getField());
assertEquals("numeric_dv", starTreeFieldType.getMetrics().get(0).getField());

// Assert default metrics
Expand Down Expand Up @@ -570,24 +569,6 @@ public void testMaxMetricsCompositeIndex() {
);
}

public void testMaxCalendarIntervalsCompositeIndex() {
MapperParsingException ex = expectThrows(
MapperParsingException.class,
() -> prepareCreate(TEST_INDEX).setMapping(createMaxDimTestMapping())
.setSettings(
Settings.builder()
.put(StarTreeIndexSettings.STAR_TREE_MAX_DATE_INTERVALS_SETTING.getKey(), 1)
.put(StarTreeIndexSettings.IS_COMPOSITE_INDEX_SETTING.getKey(), true)
.put(IndexSettings.INDEX_TRANSLOG_FLUSH_THRESHOLD_SIZE_SETTING.getKey(), new ByteSizeValue(512, ByteSizeUnit.MB))
)
.get()
);
assertEquals(
"Failed to parse mapping [_doc]: At most [1] calendar intervals are allowed in dimension [timestamp]",
ex.getMessage()
);
}

public void testUnsupportedDim() {
MapperParsingException ex = expectThrows(
MapperParsingException.class,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,7 @@
import org.opensearch.common.annotation.ExperimentalApi;
import org.opensearch.common.xcontent.support.XContentMapValues;
import org.opensearch.index.compositeindex.datacube.startree.StarTreeIndexSettings;
import org.opensearch.index.mapper.DateFieldMapper;
import org.opensearch.index.mapper.Mapper;
import org.opensearch.index.mapper.NumberFieldMapper;

import java.util.ArrayList;
import java.util.List;
Expand Down Expand Up @@ -55,11 +53,13 @@ public static Dimension parseAndCreateDimension(
Map<String, Object> dimensionMap,
Mapper.TypeParser.ParserContext c
) {
if (builder instanceof DateFieldMapper.Builder) {
if (builder.getSupportedDataCubeDimensionType().isPresent()
&& builder.getSupportedDataCubeDimensionType().get().equals(DimensionType.DATE)) {
return parseAndCreateDateDimension(name, dimensionMap, c);
} else if (builder instanceof NumberFieldMapper.Builder) {
return new NumericDimension(name);
}
} else if (builder.getSupportedDataCubeDimensionType().isPresent()
&& builder.getSupportedDataCubeDimensionType().get().equals(DimensionType.NUMERIC)) {
return new NumericDimension(name);
}
throw new IllegalArgumentException(
String.format(Locale.ROOT, "unsupported field type associated with star tree dimension [%s]", name)
);
Expand Down
Loading

0 comments on commit 23f5c2f

Please sign in to comment.