Skip to content

Commit

Permalink
star tree file formats
Browse files Browse the repository at this point in the history
Signed-off-by: Sarthak Aggarwal <sarthagg@amazon.com>
  • Loading branch information
sarthakaggarwal97 committed Jul 18, 2024
1 parent 58f34ae commit 9250579
Show file tree
Hide file tree
Showing 42 changed files with 2,649 additions and 211 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.apache.lucene.codecs.lucene90;

import org.apache.lucene.codecs.DocValuesConsumer;
import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.SegmentWriteState;

import java.io.IOException;

/**
* This class is an abstraction of the {@link DocValuesConsumer} for the Star Tree index structure.
* It is responsible to consume various types of document values (numeric, binary, sorted, sorted numeric,
* and sorted set) for fields in the Star Tree index.
*
* @opensearch.experimental
*/
public class Composite99DocValuesConsumer extends DocValuesConsumer {

Lucene90DocValuesConsumer lucene90DocValuesConsumer;

public Composite99DocValuesConsumer(
SegmentWriteState state,
String dataCodec,
String dataExtension,
String metaCodec,
String metaExtension
) throws IOException {
lucene90DocValuesConsumer = new Lucene90DocValuesConsumer(state, dataCodec, dataExtension, metaCodec, metaExtension);
}

@Override
public void close() throws IOException {
lucene90DocValuesConsumer.close();
}

@Override
public void addNumericField(FieldInfo fieldInfo, DocValuesProducer docValuesProducer) throws IOException {
lucene90DocValuesConsumer.addNumericField(fieldInfo, docValuesProducer);
}

@Override
public void addBinaryField(FieldInfo fieldInfo, DocValuesProducer docValuesProducer) throws IOException {
lucene90DocValuesConsumer.addNumericField(fieldInfo, docValuesProducer);
}

@Override
public void addSortedField(FieldInfo fieldInfo, DocValuesProducer docValuesProducer) throws IOException {
lucene90DocValuesConsumer.addSortedField(fieldInfo, docValuesProducer);
}

@Override
public void addSortedNumericField(FieldInfo fieldInfo, DocValuesProducer docValuesProducer) throws IOException {
lucene90DocValuesConsumer.addSortedNumericField(fieldInfo, docValuesProducer);
}

@Override
public void addSortedSetField(FieldInfo fieldInfo, DocValuesProducer docValuesProducer) throws IOException {
lucene90DocValuesConsumer.addSortedSetField(fieldInfo, docValuesProducer);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.apache.lucene.codecs.lucene90;

import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.index.VectorEncoding;
import org.apache.lucene.index.VectorSimilarityFunction;
import org.opensearch.index.compositeindex.datacube.startree.aggregators.MetricEntry;

import java.io.IOException;
import java.util.Collections;
import java.util.List;

import static org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeHelper.fullFieldNameForStarTreeDimensionsDocValues;
import static org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeHelper.fullFieldNameForStarTreeMetricsDocValues;

/**
* This class is a custom abstraction of the {@link DocValuesProducer} for the Star Tree index structure.
* It is responsible for providing access to various types of document values (numeric, binary, sorted, sorted numeric,
* and sorted set) for fields in the Star Tree index.
*
* @opensearch.experimental
*/
public class StarTree99DocValuesProducer extends DocValuesProducer {

Lucene90DocValuesProducer lucene90DocValuesProducer;
private final List<FieldInfo> dimensions;
private final List<MetricEntry> metrics;
private final FieldInfos fieldInfos;

public StarTree99DocValuesProducer(
SegmentReadState state,
String dataCodec,
String dataExtension,
String metaCodec,
String metaExtension,
List<FieldInfo> dimensions,
List<MetricEntry> metricEntries,
String compositeFieldName
) throws IOException {
this.dimensions = dimensions;
this.metrics = metricEntries;

// populates the dummy list of field infos to fetch doc id set iterators for respective fields.
// the dummy field info is used to fetch the doc id set iterators for respective fields based on field name
this.fieldInfos = new FieldInfos(getFieldInfoList(compositeFieldName));
SegmentReadState segmentReadState = new SegmentReadState(state.directory, state.segmentInfo, fieldInfos, state.context);
lucene90DocValuesProducer = new Lucene90DocValuesProducer(segmentReadState, dataCodec, dataExtension, metaCodec, metaExtension);
}

@Override
public NumericDocValues getNumeric(FieldInfo field) throws IOException {
return this.lucene90DocValuesProducer.getNumeric(field);
}

@Override
public BinaryDocValues getBinary(FieldInfo field) throws IOException {
return this.lucene90DocValuesProducer.getBinary(field);
}

@Override
public SortedDocValues getSorted(FieldInfo field) throws IOException {
return this.lucene90DocValuesProducer.getSorted(field);
}

@Override
public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOException {
return this.lucene90DocValuesProducer.getSortedNumeric(field);
}

@Override
public SortedSetDocValues getSortedSet(FieldInfo field) throws IOException {
return this.lucene90DocValuesProducer.getSortedSet(field);
}

@Override
public void checkIntegrity() throws IOException {
this.lucene90DocValuesProducer.checkIntegrity();
}

// returns the doc id set iterator based on field name
public SortedNumericDocValues getSortedNumeric(String fieldName) throws IOException {
return this.lucene90DocValuesProducer.getSortedNumeric(fieldInfos.fieldInfo(fieldName));
}

@Override
public void close() throws IOException {
this.lucene90DocValuesProducer.close();
}

private FieldInfo[] getFieldInfoList(String compositeFieldName) {
FieldInfo[] fieldInfoList = new FieldInfo[this.dimensions.size() + metrics.size()];

// field number is not really used. We depend on unique field names to get the desired iterator
int fieldNumber = 0;

for (FieldInfo dimension : this.dimensions) {
fieldInfoList[fieldNumber] = new FieldInfo(
fullFieldNameForStarTreeDimensionsDocValues(compositeFieldName, dimension.getName()),
fieldNumber,
false,
false,
true,
IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS,
DocValuesType.SORTED_NUMERIC,
-1,
Collections.emptyMap(),
0,
0,
0,
0,
VectorEncoding.FLOAT32,
VectorSimilarityFunction.EUCLIDEAN,
false,
false
);
fieldNumber++;
}
for (MetricEntry metric : metrics) {
fieldInfoList[fieldNumber] = new FieldInfo(
fullFieldNameForStarTreeMetricsDocValues(compositeFieldName, metric.getMetricName(), metric.getMetricStat().getTypeName()),
fieldNumber,
false,
false,
true,
IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS,
DocValuesType.SORTED_NUMERIC,
-1,
Collections.emptyMap(),
0,
0,
0,
0,
VectorEncoding.FLOAT32,
VectorSimilarityFunction.EUCLIDEAN,
false,
false
);
fieldNumber++;
}

return fieldInfoList;
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.apache.lucene.index;

import org.apache.lucene.util.Counter;

/**
* A helper class for writing sorted numeric doc values.
* <p>
* This class provides a convenient way to add sorted numeric doc values to a field
* and retrieve the corresponding {@link SortedNumericDocValues} instance.
*
* @opensearch.experimental
*/
public class SortedNumericDocValuesWriterHelper {

private final SortedNumericDocValuesWriter sortedNumericDocValuesWriter;

/**
* Sole constructor. Constructs a new {@link SortedNumericDocValuesWriterHelper} instance.
*
* @param fieldInfo the field information for the field being written
* @param counter a counter for tracking memory usage
*/
public SortedNumericDocValuesWriterHelper(FieldInfo fieldInfo, Counter counter) {
sortedNumericDocValuesWriter = new SortedNumericDocValuesWriter(fieldInfo, counter);
}

/**
* Adds a value to the sorted numeric doc values for the specified document.
*
* @param docID the document ID
* @param value the value to add
*/
public void addValue(int docID, long value) {
sortedNumericDocValuesWriter.addValue(docID, value);
}

/**
* Returns the {@link SortedNumericDocValues} instance containing the sorted numeric doc values
*
* @return the {@link SortedNumericDocValues} instance
*/
public SortedNumericDocValues getDocValues() {
return sortedNumericDocValuesWriter.getDocValues();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,36 @@ public class Composite99DocValuesFormat extends DocValuesFormat {
private final DocValuesFormat delegate;
private final MapperService mapperService;

/** Data codec name for Composite Doc Values Format */
public static final String DATA_CODEC_NAME = "Composite99FormatData";

/** Meta codec name for Composite Doc Values Format */
public static final String META_CODEC_NAME = "Composite99FormatMeta";

/** Filename extension for the composite index data */
public static final String DATA_EXTENSION = "cid";

/** Filename extension for the composite index meta */
public static final String META_EXTENSION = "cim";

/** Data doc values codec name for Composite Doc Values Format */
public static final String DATA_DOC_VALUES_CODEC = "Composite99DocValuesData";

/** Meta doc values codec name for Composite Doc Values Format */
public static final String META_DOC_VALUES_CODEC = "Composite99DocValuesMetadata";

/** Filename extension for the composite index data doc values */
public static final String DATA_DOC_VALUES_EXTENSION = "cidvd";

/** Filename extension for the composite index meta doc values */
public static final String META_DOC_VALUES_EXTENSION = "cidvm";

/** Initial version for the Composite90DocValuesFormat */
public static final int VERSION_START = 0;

/** Current version for the Composite90DocValuesFormat */
public static final int VERSION_CURRENT = VERSION_START;

// needed for SPI
public Composite99DocValuesFormat() {
this(new Lucene90DocValuesFormat(), null);
Expand Down
Loading

0 comments on commit 9250579

Please sign in to comment.