Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Inital changes to support PointValues with Summary information for timeseries use case #1

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion lucene/backward-codecs/src/java/module-info.java
Original file line number Diff line number Diff line change
Expand Up @@ -51,5 +51,6 @@
org.apache.lucene.backward_codecs.lucene87.Lucene87Codec,
org.apache.lucene.backward_codecs.lucene90.Lucene90Codec,
org.apache.lucene.backward_codecs.lucene91.Lucene91Codec,
org.apache.lucene.backward_codecs.lucene92.Lucene92Codec;
org.apache.lucene.backward_codecs.lucene92.Lucene92Codec,
org.apache.lucene.backward_codecs.lucene93Ext.Lucene93ExtCodec;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
package org.apache.lucene.backward_codecs.lucene93Ext;

import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.FilterCodec;
import org.apache.lucene.codecs.PointsFormat;
import org.apache.lucene.codecs.PointsReader;
import org.apache.lucene.codecs.PointsWriter;
import org.apache.lucene.codecs.lucene93.Lucene93Codec;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.util.bkd.BKDConfig;
import org.apache.lucene.util.bkd.BKDWriter;

import java.io.IOException;

public class Lucene93ExtCodec extends FilterCodec {

public static final String SUMMARY_INDEX_EXTENSION = "kdsi";

public static final String SUMMARY_DATA_EXTENSION = "kdsd";
public static final String SUMMARY_DATA_CODEC_NAME = "Lucene93PointsFormatSummaryData";
public static final String SUMMARY_INDEX_CODEC_NAME = "Lucene93PointsFormatSummaryIndex";

final int maxPointsInLeafNode;
final double maxMBSortHeap;

public Lucene93ExtCodec() {
this(new Lucene93Codec(), BKDConfig.DEFAULT_MAX_POINTS_IN_LEAF_NODE,
BKDWriter.DEFAULT_MAX_MB_SORT_IN_HEAP);
}
public Lucene93ExtCodec(Codec defaultCodec, int maxPointsInLeafNode, double maxMBSortHeap) {
super("Lucene93ExtCodec", defaultCodec);
this.maxPointsInLeafNode = maxPointsInLeafNode;
this.maxMBSortHeap = maxMBSortHeap;
}

@Override
public PointsFormat pointsFormat() {
return new PointsFormat() {
@Override
public PointsWriter fieldsWriter(SegmentWriteState writeState) throws IOException {
return new Lucene93ExtPointsWriter(writeState, maxPointsInLeafNode, maxMBSortHeap);
}

@Override
public PointsReader fieldsReader(SegmentReadState readState) throws IOException {
return new Lucene93ExtPointsReader(readState);
}
};
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
package org.apache.lucene.backward_codecs.lucene93Ext;

import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.lucene90.Lucene90PointsFormat;
import org.apache.lucene.codecs.lucene90.Lucene90PointsReader;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.bkd.BKDReader;
import org.apache.lucene.util.bkd.BKDWithSummaryReader;

import java.io.IOException;

public class Lucene93ExtPointsReader extends Lucene90PointsReader {
/**
* Sole constructor
*
* @param readState
*/
IndexInput summaryIndexIn, summaryDataIn;
long summaryIndexLength = -1, summaryDataLength = -1;
public Lucene93ExtPointsReader(SegmentReadState readState) throws IOException {
super(readState);
}

@Override
public void init(SegmentReadState readState) throws IOException {
summaryIndexIn = getSummaryIndexIn(readState);
summaryDataIn = getSummaryDataIn(readState);
}
private IndexInput getSummaryIndexIn(SegmentReadState readState) throws IOException {
boolean success = false;
IndexInput summaryIndexIn = null;
for (FieldInfo fieldInfo : readState.fieldInfos) {
if (fieldInfo.hasPointsSummary()) {
try {
String summaryIndexName =
IndexFileNames.segmentFileName(
readState.segmentInfo.name,
readState.segmentSuffix,
Lucene93ExtCodec.SUMMARY_INDEX_EXTENSION);
summaryIndexIn = readState.directory.openInput(summaryIndexName, readState.context);
CodecUtil.checkIndexHeader(
summaryIndexIn,
Lucene93ExtCodec.SUMMARY_INDEX_CODEC_NAME,
Lucene90PointsFormat.VERSION_START,
Lucene90PointsFormat.VERSION_CURRENT,
readState.segmentInfo.getId(),
readState.segmentSuffix);
CodecUtil.retrieveChecksum(summaryIndexIn);
success = true;
} finally {
if (success == false) {
IOUtils.closeWhileHandlingException(this);
}
}
break;
}
}
return summaryIndexIn;
}

private IndexInput getSummaryDataIn(SegmentReadState readState) throws IOException {
boolean success = false;
IndexInput summaryDataIn = null;
for (FieldInfo fieldInfo : readState.fieldInfos) {
if (fieldInfo.hasPointsSummary()) {
try {
String summaryDataName =
IndexFileNames.segmentFileName(
readState.segmentInfo.name,
readState.segmentSuffix,
Lucene93ExtCodec.SUMMARY_DATA_EXTENSION);
summaryDataIn = readState.directory.openInput(summaryDataName, readState.context);
CodecUtil.checkIndexHeader(
summaryDataIn,
Lucene93ExtCodec.SUMMARY_DATA_CODEC_NAME,
Lucene90PointsFormat.VERSION_START,
Lucene90PointsFormat.VERSION_CURRENT,
readState.segmentInfo.getId(),
readState.segmentSuffix);
CodecUtil.retrieveChecksum(summaryDataIn);
success = true;
} finally {
if (success == false) {
IOUtils.closeWhileHandlingException(this);
}
}
break;
}
}
return summaryDataIn;
}

@Override
public BKDReader getBKDReader(SegmentReadState readState, int fieldNumber, IndexInput metaIn, IndexInput indexIn, IndexInput dataIn)
throws IOException {
if (readState.fieldInfos.fieldInfo(fieldNumber).hasPointsSummary()) {
return new BKDWithSummaryReader(metaIn, indexIn, dataIn, summaryIndexIn, summaryDataIn);
} else {
return super.getBKDReader(readState, fieldNumber, metaIn, indexIn, dataIn);
}
}

@Override
public void readAdditionalMetadata(IndexInput metaIn) throws IOException {
super.readAdditionalMetadata(metaIn);
if (summaryIndexIn != null) {
summaryIndexLength = metaIn.readLong();
summaryDataLength = metaIn.readLong();
}
}

@Override
public void retrieveAdditionalChecksum() throws IOException {
super.retrieveAdditionalChecksum();
if (summaryIndexIn != null) {
CodecUtil.retrieveChecksum(summaryIndexIn, summaryIndexLength);
}
if (summaryDataIn != null) {
CodecUtil.retrieveChecksum(summaryDataIn, summaryDataLength);
}
}

@Override
public void checkIntegrity() throws IOException {
super.checkIntegrity();
if (summaryIndexIn != null) {
CodecUtil.checksumEntireFile(summaryIndexIn);
}
if (summaryDataIn != null) {
CodecUtil.checksumEntireFile(summaryDataIn);
}
}

@Override
public void close() throws IOException {
IOUtils.close(summaryIndexIn, summaryDataIn);
super.close();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
package org.apache.lucene.backward_codecs.lucene93Ext;

import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.MutableSummaryPointTree;
import org.apache.lucene.codecs.lucene90.Lucene90PointsFormat;
import org.apache.lucene.codecs.lucene90.Lucene90PointsWriter;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.PointValues;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.bkd.BKDConfig;
import org.apache.lucene.util.bkd.BKDSummaryWriter;
import org.apache.lucene.util.bkd.BKDWriter;

import java.io.IOException;

public class Lucene93ExtPointsWriter extends Lucene90PointsWriter {
protected IndexOutput summaryIndex = null, summaryData = null;
SegmentWriteState writeState;

public Lucene93ExtPointsWriter(SegmentWriteState writeState, int maxPointsInLeafNode, double maxMBSortInHeap) throws IOException {
super(writeState, maxPointsInLeafNode, maxMBSortInHeap);
this.writeState = writeState;
}

public Lucene93ExtPointsWriter(SegmentWriteState writeState) throws IOException {
super(writeState);
}

@Override
public BKDWriter getBKDWriter(SegmentWriteState writeState, PointValues.PointTree values, BKDConfig config, FieldInfo fieldInfo) {
if(values instanceof MutableSummaryPointTree && fieldInfo.hasPointsSummary()) {
return new BKDSummaryWriter(
writeState.segmentInfo.maxDoc(),
writeState.directory,
writeState.segmentInfo.name,
config,
maxMBSortInHeap,
values.size());
} else {
return super.getBKDWriter(writeState, values, config, fieldInfo);
}
}

@Override
public Runnable writeField(IndexOutput metaOut,
IndexOutput indexOut,
IndexOutput dataOut,
FieldInfo fieldInfo,
PointValues.PointTree pointTree,
BKDWriter writer) throws IOException {
if(pointTree instanceof MutableSummaryPointTree && fieldInfo.hasPointsSummary()) {
initSummaryFiles();
return ((BKDSummaryWriter) writer).writeField(metaOut, indexOut, dataOut, summaryIndex, summaryData,
fieldInfo.getName(), (MutableSummaryPointTree) pointTree, fieldInfo.getMergeFunction());
} else {
return super.writeField(metaOut, indexOut, dataOut, fieldInfo, pointTree, writer);
}
}

private void initSummaryFiles() throws IOException {
if (summaryIndex == null) {
String summaryIndexFileName =
IndexFileNames.segmentFileName(
writeState.segmentInfo.name,
writeState.segmentSuffix,
Lucene93ExtCodec.SUMMARY_INDEX_EXTENSION);
summaryIndex = writeState.directory.createOutput(summaryIndexFileName, writeState.context);
CodecUtil.writeIndexHeader(
summaryIndex,
Lucene93ExtCodec.SUMMARY_INDEX_CODEC_NAME,
Lucene90PointsFormat.VERSION_CURRENT,
writeState.segmentInfo.getId(),
writeState.segmentSuffix);
}

if (summaryData == null) {
String summaryDataFileName =
IndexFileNames.segmentFileName(
writeState.segmentInfo.name,
writeState.segmentSuffix,
Lucene93ExtCodec.SUMMARY_DATA_EXTENSION);
summaryData = writeState.directory.createOutput(summaryDataFileName, writeState.context);
CodecUtil.writeIndexHeader(
summaryData,
Lucene93ExtCodec.SUMMARY_DATA_CODEC_NAME,
Lucene90PointsFormat.VERSION_CURRENT,
writeState.segmentInfo.getId(),
writeState.segmentSuffix);
}
}

@Override
public Runnable writerFinish(BKDWriter writer, IndexOutput metaOut, IndexOutput indexOut, IndexOutput dataOut)
throws IOException {
if (writer instanceof BKDSummaryWriter) {
throw new UnsupportedOperationException("Merge on BKDTrees is not yet supported yet");
} else {
return super.writerFinish(writer, metaOut, indexOut, dataOut);
}
}

@Override
public void finish() throws IOException {
if (summaryIndex != null) {
CodecUtil.writeFooter(summaryIndex);
}
if (summaryData != null) {
CodecUtil.writeFooter(summaryData);
}
super.finish();
}

public void writeAdditionalMetadata(IndexOutput metaOut) throws IOException {
super.writeAdditionalMetadata(metaOut);
if (summaryIndex != null) {
metaOut.writeLong(summaryIndex.getFilePointer());
}
if (summaryData != null) {
metaOut.writeLong(summaryData.getFilePointer());
}
}

@Override
public void close() throws IOException {
super.close();
if (summaryIndex != null) {
IOUtils.close(summaryIndex);
}
if (summaryData != null) {
IOUtils.close(summaryData);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,4 @@ org.apache.lucene.backward_codecs.lucene87.Lucene87Codec
org.apache.lucene.backward_codecs.lucene90.Lucene90Codec
org.apache.lucene.backward_codecs.lucene91.Lucene91Codec
org.apache.lucene.backward_codecs.lucene92.Lucene92Codec
org.apache.lucene.backward_codecs.lucene93Ext.Lucene93ExtCodec
Loading