Skip to content

Commit

Permalink
Resolve merge conflicts
Browse files Browse the repository at this point in the history
Moved FST metadata saving into FSTMetadata class per suggestion from
@dungba88.
  • Loading branch information
msfroh committed Feb 26, 2024
1 parent f8c1f0e commit 49b622f
Show file tree
Hide file tree
Showing 3 changed files with 53 additions and 53 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -306,7 +306,7 @@ public SynonymMap build(SynonymMapDirectory directory) throws IOException {
fstCompiler.add(Util.toUTF32(input, scratchIntsRef), scratch.toBytesRef());
}

FST<BytesRef> fst = FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader());
FST.FSTMetadata<BytesRef> fstMetaData = fstCompiler.compile();
if (directory != null) {
fstOutput.close(); // TODO -- Should fstCompiler.compile take care of this?
try (SynonymMapDirectory.WordsOutput wordsOutput = directory.wordsOutput()) {
Expand All @@ -316,9 +316,10 @@ public SynonymMap build(SynonymMapDirectory directory) throws IOException {
wordsOutput.addWord(scratchRef);
}
}
directory.writeMetadata(words.size(), maxHorizontalContext, fst);
directory.writeMetadata(words.size(), maxHorizontalContext, fstMetaData);
return directory.readMap();
}
FST<BytesRef> fst = FST.fromFSTReader(fstMetaData, fstCompiler.getFSTReader());
BytesRefHashLike wordsLike =
new BytesRefHashLike() {
@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,10 @@ public WordsOutput wordsOutput() throws IOException {
return synonymMapFormat.getWordsOutput(directory);
}

public void writeMetadata(int wordCount, int maxHorizontalContext, FST<BytesRef> fst)
public void writeMetadata(
int wordCount, int maxHorizontalContext, FST.FSTMetadata<BytesRef> fstMetadata)
throws IOException {
synonymMapFormat.writeMetadata(directory, wordCount, maxHorizontalContext, fst);
synonymMapFormat.writeMetadata(directory, wordCount, maxHorizontalContext, fstMetadata);
}

public SynonymMap readMap() throws IOException {
Expand Down Expand Up @@ -107,12 +108,15 @@ public void addWord(BytesRef word) throws IOException {
;

public void writeMetadata(
Directory directory, int wordCount, int maxHorizontalContext, FST<BytesRef> fst)
Directory directory,
int wordCount,
int maxHorizontalContext,
FST.FSTMetadata<BytesRef> fstMetadata)
throws IOException {
try (IndexOutput metadataOutput = directory.createOutput(METADATA_FILE, IOContext.DEFAULT)) {
metadataOutput.writeVInt(wordCount);
metadataOutput.writeVInt(maxHorizontalContext);
fst.saveMetadata(metadataOutput);
fstMetadata.save(metadataOutput);
}
directory.sync(List.of(FST_FILE, WORDS_FILE, METADATA_FILE));
}
Expand Down
89 changes: 42 additions & 47 deletions lucene/core/src/java/org/apache/lucene/util/fst/FST.java
Original file line number Diff line number Diff line change
Expand Up @@ -537,56 +537,10 @@ public FSTMetadata<T> getMetadata() {
* @param out the DataOutput to write the FST bytes to
*/
public void save(DataOutput metaOut, DataOutput out) throws IOException {
saveMetadata(metaOut);
metadata.save(metaOut);
fstReader.writeTo(out);
}

/**
* Save the metadata to a DataOutput
*
* @param metaOut the DataOutput to write the metadata to
*/
public void saveMetadata(DataOutput metaOut) throws IOException {
CodecUtil.writeHeader(metaOut, FILE_FORMAT_NAME, VERSION_CURRENT);
// TODO: really we should encode this as an arc, arriving
// to the root node, instead of special casing here:
if (metadata.emptyOutput != null) {
// Accepts empty string
metaOut.writeByte((byte) 1);

// Serialize empty-string output:
ByteBuffersDataOutput ros = new ByteBuffersDataOutput();
outputs.writeFinalOutput(metadata.emptyOutput, ros);
byte[] emptyOutputBytes = ros.toArrayCopy();
int emptyLen = emptyOutputBytes.length;

// reverse
final int stopAt = emptyLen / 2;
int upto = 0;
while (upto < stopAt) {
final byte b = emptyOutputBytes[upto];
emptyOutputBytes[upto] = emptyOutputBytes[emptyLen - upto - 1];
emptyOutputBytes[emptyLen - upto - 1] = b;
upto++;
}
metaOut.writeVInt(emptyLen);
metaOut.writeBytes(emptyOutputBytes, 0, emptyLen);
} else {
metaOut.writeByte((byte) 0);
}
final byte t;
if (metadata.inputType == INPUT_TYPE.BYTE1) {
t = 0;
} else if (metadata.inputType == INPUT_TYPE.BYTE2) {
t = 1;
} else {
t = 2;
}
metaOut.writeByte(t);
metaOut.writeVLong(metadata.startNode);
metaOut.writeVLong(numBytes());
}

/** Writes an automaton to a file. */
public void save(final Path path) throws IOException {
try (OutputStream os = new BufferedOutputStream(Files.newOutputStream(path))) {
Expand Down Expand Up @@ -1258,5 +1212,46 @@ public FSTMetadata(
public int getVersion() {
return version;
}

public void save(DataOutput metaOut) throws IOException {
CodecUtil.writeHeader(metaOut, FILE_FORMAT_NAME, VERSION_CURRENT);
// TODO: really we should encode this as an arc, arriving
// to the root node, instead of special casing here:
if (emptyOutput != null) {
// Accepts empty string
metaOut.writeByte((byte) 1);

// Serialize empty-string output:
ByteBuffersDataOutput ros = new ByteBuffersDataOutput();
outputs.writeFinalOutput(emptyOutput, ros);
byte[] emptyOutputBytes = ros.toArrayCopy();
int emptyLen = emptyOutputBytes.length;

// reverse
final int stopAt = emptyLen / 2;
int upto = 0;
while (upto < stopAt) {
final byte b = emptyOutputBytes[upto];
emptyOutputBytes[upto] = emptyOutputBytes[emptyLen - upto - 1];
emptyOutputBytes[emptyLen - upto - 1] = b;
upto++;
}
metaOut.writeVInt(emptyLen);
metaOut.writeBytes(emptyOutputBytes, 0, emptyLen);
} else {
metaOut.writeByte((byte) 0);
}
final byte t;
if (inputType == INPUT_TYPE.BYTE1) {
t = 0;
} else if (inputType == INPUT_TYPE.BYTE2) {
t = 1;
} else {
t = 2;
}
metaOut.writeByte(t);
metaOut.writeVLong(startNode);
metaOut.writeVLong(numBytes);
}
}
}

0 comments on commit 49b622f

Please sign in to comment.