Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Star Tree File Formats #14809

Merged
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
package org.opensearch.index.codec.composite;

import org.opensearch.common.annotation.ExperimentalApi;
import org.opensearch.index.compositeindex.datacube.startree.index.CompositeIndexValues;

import java.io.IOException;
import java.util.List;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,36 @@ public class Composite99DocValuesFormat extends DocValuesFormat {
private final DocValuesFormat delegate;
private final MapperService mapperService;

/** Data codec name for Composite Doc Values Format */
public static final String DATA_CODEC_NAME = "Composite99FormatData";

/** Meta codec name for Composite Doc Values Format */
public static final String META_CODEC_NAME = "Composite99FormatMeta";

/** Filename extension for the composite index data */
public static final String DATA_EXTENSION = "cid";

/** Filename extension for the composite index meta */
public static final String META_EXTENSION = "cim";

/** Data doc values codec name for Composite Doc Values Format */
public static final String DATA_DOC_VALUES_CODEC = "Composite99DocValuesData";

/** Meta doc values codec name for Composite Doc Values Format */
public static final String META_DOC_VALUES_CODEC = "Composite99DocValuesMetadata";

/** Filename extension for the composite index data doc values */
public static final String DATA_DOC_VALUES_EXTENSION = "cidvd";

/** Filename extension for the composite index meta doc values */
public static final String META_DOC_VALUES_EXTENSION = "cidvm";

/** Initial version for the Composite90DocValuesFormat */
public static final int VERSION_START = 0;

/** Current version for the Composite90DocValuesFormat */
public static final int VERSION_CURRENT = VERSION_START;

// needed for SPI
public Composite99DocValuesFormat() {
this(new Lucene90DocValuesFormat(), null);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,22 +8,47 @@

package org.opensearch.index.codec.composite.composite99;

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.IndexInput;
import org.opensearch.common.annotation.ExperimentalApi;
import org.opensearch.common.util.io.IOUtils;
import org.opensearch.index.codec.composite.CompositeIndexFieldInfo;
import org.opensearch.index.codec.composite.CompositeIndexReader;
import org.opensearch.index.codec.composite.CompositeIndexValues;
import org.opensearch.index.codec.composite.LuceneDocValuesProducerFactory;
import org.opensearch.index.compositeindex.CompositeIndexMetadata;
import org.opensearch.index.compositeindex.datacube.Metric;
import org.opensearch.index.compositeindex.datacube.MetricStat;
import org.opensearch.index.compositeindex.datacube.startree.fileformats.meta.StarTreeMetadata;
import org.opensearch.index.compositeindex.datacube.startree.index.CompositeIndexValues;
import org.opensearch.index.compositeindex.datacube.startree.index.StarTreeValues;
import org.opensearch.index.mapper.CompositeMappedFieldType;

import java.io.IOException;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;

import static org.opensearch.index.compositeindex.CompositeIndexConstants.COMPOSITE_FIELD_MARKER;
import static org.opensearch.index.compositeindex.datacube.startree.fileformats.StarTreeWriter.VERSION_CURRENT;
import static org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeUtils.fullyQualifiedFieldNameForStarTreeDimensionsDocValues;
import static org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeUtils.fullyQualifiedFieldNameForStarTreeMetricsDocValues;
import static org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeUtils.getFieldInfoList;

/**
* Reader for star tree index and star tree doc values from the segments
Expand All @@ -32,11 +57,158 @@
*/
@ExperimentalApi
public class Composite99DocValuesReader extends DocValuesProducer implements CompositeIndexReader {
private DocValuesProducer delegate;
private static final Logger logger = LogManager.getLogger(Composite99DocValuesReader.class);

private final DocValuesProducer delegate;
private IndexInput dataIn;
private ChecksumIndexInput metaIn;
private final Map<String, IndexInput> compositeIndexInputMap = new LinkedHashMap<>();
private final Map<String, CompositeIndexMetadata> compositeIndexMetadataMap = new LinkedHashMap<>();
private final List<String> fields;
private DocValuesProducer compositeDocValuesProducer;
private final List<CompositeIndexFieldInfo> compositeFieldInfos = new ArrayList<>();
private SegmentReadState readState;

public Composite99DocValuesReader(DocValuesProducer producer, SegmentReadState state) throws IOException {
public Composite99DocValuesReader(DocValuesProducer producer, SegmentReadState readState) throws IOException {
this.delegate = producer;
// TODO : read star tree files
this.fields = new ArrayList<>();

String metaFileName = IndexFileNames.segmentFileName(
readState.segmentInfo.name,
readState.segmentSuffix,
Composite99DocValuesFormat.META_EXTENSION
);

String dataFileName = IndexFileNames.segmentFileName(
readState.segmentInfo.name,
readState.segmentSuffix,
Composite99DocValuesFormat.DATA_EXTENSION
);

boolean success = false;
try {

// initialize meta input
dataIn = readState.directory.openInput(dataFileName, readState.context);
CodecUtil.checkIndexHeader(
dataIn,
Composite99DocValuesFormat.DATA_CODEC_NAME,
Composite99DocValuesFormat.VERSION_START,
Composite99DocValuesFormat.VERSION_CURRENT,
readState.segmentInfo.getId(),
readState.segmentSuffix
);

// initialize data input
metaIn = readState.directory.openChecksumInput(metaFileName, readState.context);
Throwable priorE = null;
try {
CodecUtil.checkIndexHeader(
metaIn,
Composite99DocValuesFormat.META_CODEC_NAME,
Composite99DocValuesFormat.VERSION_START,
Composite99DocValuesFormat.VERSION_CURRENT,
readState.segmentInfo.getId(),
readState.segmentSuffix
);

while (true) {

// validate magic marker
long magicMarker = metaIn.readLong();
if (magicMarker == -1) {
break;
} else if (magicMarker < 0) {
throw new CorruptIndexException("Unknown token encountered: " + magicMarker, metaIn);
} else if (COMPOSITE_FIELD_MARKER != magicMarker) {
logger.error("Invalid composite field magic marker");
throw new IOException("Invalid composite field magic marker");
}

int version = metaIn.readVInt();
if (VERSION_CURRENT != version) {
logger.error("Invalid composite field version");
throw new IOException("Invalid composite field version");
}

// construct composite index metadata
String compositeFieldName = metaIn.readString();
CompositeMappedFieldType.CompositeFieldType compositeFieldType = CompositeMappedFieldType.CompositeFieldType.fromName(
metaIn.readString()
);

switch (compositeFieldType) {
case STAR_TREE:
StarTreeMetadata starTreeMetadata = new StarTreeMetadata(
metaIn,
compositeFieldName,
compositeFieldType,
version
);
compositeFieldInfos.add(new CompositeIndexFieldInfo(compositeFieldName, compositeFieldType));

IndexInput starTreeIndexInput = dataIn.slice(
"star-tree data slice for respective star-tree fields",
starTreeMetadata.getDataStartFilePointer(),
starTreeMetadata.getDataLength()
);
compositeIndexInputMap.put(compositeFieldName, starTreeIndexInput);
compositeIndexMetadataMap.put(compositeFieldName, starTreeMetadata);

List<String> dimensionFields = starTreeMetadata.getDimensionFields();

// generating star tree unique fields (fully qualified name for dimension and metrics)
for (String dimensions : dimensionFields) {
fields.add(fullyQualifiedFieldNameForStarTreeDimensionsDocValues(compositeFieldName, dimensions));
}

// adding metric fields
for (Metric metric : starTreeMetadata.getMetrics()) {
for (MetricStat metricStat : metric.getMetrics()) {
fields.add(
fullyQualifiedFieldNameForStarTreeMetricsDocValues(
compositeFieldName,
metric.getField(),
metricStat.getTypeName()
)
);

}
}

break;
default:
throw new CorruptIndexException("Invalid composite field type found in the file", dataIn);
}
}

// populates the dummy list of field infos to fetch doc id set iterators for respective fields.
// the dummy field info is used to fetch the doc id set iterators for respective fields based on field name
FieldInfos fieldInfos = new FieldInfos(getFieldInfoList(fields));
this.readState = new SegmentReadState(readState.directory, readState.segmentInfo, fieldInfos, readState.context);

// initialize star-tree doc values producer

compositeDocValuesProducer = LuceneDocValuesProducerFactory.getDocValuesProducerForCompositeCodec(
Composite99Codec.COMPOSITE_INDEX_CODEC_NAME,
this.readState,
Composite99DocValuesFormat.DATA_DOC_VALUES_CODEC,
Composite99DocValuesFormat.DATA_DOC_VALUES_EXTENSION,
Composite99DocValuesFormat.META_DOC_VALUES_CODEC,
Composite99DocValuesFormat.META_DOC_VALUES_EXTENSION
);

} catch (Throwable t) {
priorE = t;
} finally {
CodecUtil.checkFooter(metaIn, priorE);
}
success = true;
} finally {
if (success == false) {
IOUtils.closeWhileHandlingException(this);
}
}
}

@Override
Expand Down Expand Up @@ -67,24 +239,63 @@ public SortedSetDocValues getSortedSet(FieldInfo field) throws IOException {
@Override
public void checkIntegrity() throws IOException {
delegate.checkIntegrity();
// Todo : check integrity of composite index related [star tree] files
CodecUtil.checksumEntireFile(dataIn);
}

@Override
public void close() throws IOException {
delegate.close();
// Todo: close composite index related files [star tree] files
boolean success = false;
try {
IOUtils.close(metaIn, dataIn);
IOUtils.close(compositeDocValuesProducer);
success = true;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(metaIn, dataIn);
}
compositeIndexInputMap.clear();
compositeIndexMetadataMap.clear();
fields.clear();
metaIn = null;
dataIn = null;
}
}

@Override
public List<CompositeIndexFieldInfo> getCompositeIndexFields() {
// todo : read from file formats and get the field names.
return new ArrayList<>();
return compositeFieldInfos;
}

@Override
public CompositeIndexValues getCompositeIndexValues(CompositeIndexFieldInfo compositeIndexFieldInfo) throws IOException {
// TODO : read compositeIndexValues [starTreeValues] from star tree files
throw new UnsupportedOperationException();

switch (compositeIndexFieldInfo.getType()) {
case STAR_TREE:
return new StarTreeValues(
compositeIndexMetadataMap.get(compositeIndexFieldInfo.getField()),
compositeIndexInputMap.get(compositeIndexFieldInfo.getField()),
compositeDocValuesProducer,
this.readState
);

default:
throw new CorruptIndexException("Unsupported composite index field type: ", compositeIndexFieldInfo.getType().getName());
}

}

/**
* Returns the sorted numeric doc values for the given sorted numeric field.
* If the sorted numeric field is null, it returns an empty doc id set iterator.
* <p>
* Sorted numeric field can be null for cases where the segment doesn't hold a particular value.
*
* @param sortedNumeric the sorted numeric doc values for a field
* @return empty sorted numeric values if the field is not present, else sortedNumeric
*/
public static SortedNumericDocValues getSortedNumericDocValues(SortedNumericDocValues sortedNumeric) {
return sortedNumeric == null ? DocValues.emptySortedNumeric() : sortedNumeric;
}

}
Loading
Loading