Skip to content

Commit

Permalink
Use .si file's Lucene version for files without version info
Browse files Browse the repository at this point in the history
Signed-off-by: Bhumika Saini <sabhumik@amazon.com>
  • Loading branch information
Bhumika Saini committed Jul 21, 2023
1 parent e1ce4e6 commit 0e1adb4
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 56 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,7 @@

package org.opensearch.index.remote;

import org.apache.lucene.index.CorruptIndexException;

import java.util.Arrays;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
* Utils for remote store
Expand Down Expand Up @@ -54,23 +50,23 @@ public static long invertLong(String str) {
}

/**
* Extracts the Lucene major version from the provided DocValuesUpdates file name
* @param filename DocValuesUpdates file name to parse
* @return Lucene major version that wrote the DocValuesUpdates file
* @throws CorruptIndexException If the Lucene major version cannot be inferred
* Extracts the segment name from the provided segment file name
* @param filename Segment file name to parse
* @return Name of the segment that the segment file belongs to
*/
public static int getLuceneVersionForDocValuesUpdates(String filename) throws CorruptIndexException {
// TODO: The following regex could work incorrectly if both major and minor versions are double-digits.
// This is because the major and minor versions do not have a separator in the filename currently
// (Lucence<major><minor>).
// We may need to revisit this if the filename pattern is updated in future Lucene versions.
Pattern docValuesUpdatesFileNamePattern = Pattern.compile("_\\d+_\\d+_Lucene(\\d+)\\d+_\\d+");
public static String getSegmentName(String filename) {
// Segment file names follow patterns like "_0.cfe" or "_0_1_Lucene90_0.dvm".
// Here, the segment name is "_0", which is the set of characters
// starting with "_" until the next "_" or first ".".
int endIdx = filename.indexOf('_', 1);
if (endIdx == -1) {
endIdx = filename.indexOf('.');
}

Matcher matcher = docValuesUpdatesFileNamePattern.matcher(filename);
if (matcher.find()) {
return Integer.parseInt(matcher.group(1));
} else {
throw new CorruptIndexException("Unable to infer Lucene version for segment file " + filename, filename);
if (endIdx == -1) {
throw new IllegalArgumentException("Unable to infer segment name for segment file " + filename);
}

return filename.substring(0, endIdx);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.SegmentCommitInfo;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.SegmentInfos;
import org.apache.lucene.store.ByteBuffersDataOutput;
import org.apache.lucene.store.ByteBuffersIndexOutput;
Expand Down Expand Up @@ -624,22 +623,12 @@ public void uploadMetadata(
);
try {
try (IndexOutput indexOutput = storeDirectory.createOutput(metadataFilename, IOContext.DEFAULT)) {
Map<String, Integer> segmentToLuceneVersion = getSegmentToLuceneVersion(segmentInfosSnapshot);
Map<String, Integer> segmentToLuceneVersion = getSegmentToLuceneVersion(segmentFiles, segmentInfosSnapshot);
Map<String, String> uploadedSegments = new HashMap<>();
for (String file : segmentFiles) {
if (segmentsUploadedToRemoteStore.containsKey(file)) {
UploadedSegmentMetadata metadata = segmentsUploadedToRemoteStore.get(file);
if (segmentToLuceneVersion.containsKey(metadata.originalFilename)) {
metadata.setWrittenByMajor(segmentToLuceneVersion.get(metadata.originalFilename));
} else if (metadata.originalFilename.equals(segmentInfosSnapshot.getSegmentsFileName())) {
metadata.setWrittenByMajor(segmentInfosSnapshot.getCommitLuceneVersion().major);
} else {
throw new CorruptIndexException(
"Lucene version is missing for segment file " + metadata.originalFilename,
metadata.originalFilename
);
}

metadata.setWrittenByMajor(segmentToLuceneVersion.get(metadata.originalFilename));
uploadedSegments.put(file, metadata.toString());
} else {
throw new NoSuchFileException(file);
Expand Down Expand Up @@ -671,34 +660,30 @@ public void uploadMetadata(
}

/**
* Parses the provided SegmenttInfos to retrieve a mapping of segment files to the respective Lucene major version that wrote the segments
* @param segmentInfosSnapshot SegmenttInfos instance to parse
* @return Map of segment file name to the Lucene major version that wrote it
* @throws CorruptIndexException If the Lucene major version cannot be parsed for a segment file
* Parses the provided SegmentInfos to retrieve a mapping of the provided segment files to
* the respective Lucene major version that wrote the segments
* @param segmentFiles List of segment files for which the Lucene major version is needed
* @param segmentInfosSnapshot SegmentInfos instance to parse
* @return Map of the segment file to its Lucene major version
*/
private Map<String, Integer> getSegmentToLuceneVersion(SegmentInfos segmentInfosSnapshot) throws CorruptIndexException {
private Map<String, Integer> getSegmentToLuceneVersion(Collection<String> segmentFiles, SegmentInfos segmentInfosSnapshot) {
Map<String, Integer> segmentToLuceneVersion = new HashMap<>();
for (SegmentCommitInfo segmentCommitInfo : segmentInfosSnapshot) {
SegmentInfo info = segmentCommitInfo.info;
Set<String> segFiles = info.files();
for (String file : segFiles) {
segmentToLuceneVersion.put(file, info.getVersion().major);
}
}

int docValuesUpdatesLuceneMajorVersion = -1;
Map<Integer, Set<String>> docValuesUpdatesFiles = segmentCommitInfo.getDocValuesUpdatesFiles();
for (int key : docValuesUpdatesFiles.keySet()) {
for (String file : docValuesUpdatesFiles.get(key)) {
if (docValuesUpdatesLuceneMajorVersion == -1) {
docValuesUpdatesLuceneMajorVersion = RemoteStoreUtils.getLuceneVersionForDocValuesUpdates(file);
}

segmentToLuceneVersion.put(file, docValuesUpdatesLuceneMajorVersion);
}

Set<String> fieldInfosFiles = segmentCommitInfo.getFieldInfosFiles();
for (String file : fieldInfosFiles) {
segmentToLuceneVersion.put(file, docValuesUpdatesLuceneMajorVersion);
for (String file : segmentFiles) {
if (segmentToLuceneVersion.containsKey(file) == false) {
if (file.equals(segmentInfosSnapshot.getSegmentsFileName())) {
segmentToLuceneVersion.put(file, segmentInfosSnapshot.getCommitLuceneVersion().major);
} else {
// Fallback to the Lucene major version of the respective segment's .si file
String segmentInfoFileName = RemoteStoreUtils.getSegmentName(file) + ".si";
segmentToLuceneVersion.put(file, segmentToLuceneVersion.get(segmentInfoFileName));
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@

package org.opensearch.index.remote;

import org.apache.lucene.index.CorruptIndexException;
import org.opensearch.test.OpenSearchTestCase;

public class RemoteStoreUtilsTests extends OpenSearchTestCase {
Expand Down Expand Up @@ -40,11 +39,25 @@ public void testinvert() {
}
}

public void testGetLuceneVersionForDocValuesUpdates() throws CorruptIndexException {
assertEquals(9, RemoteStoreUtils.getLuceneVersionForDocValuesUpdates("_0_1_Lucene90_0.dvm"));
public void testGetSegmentNameForCfeFile() {
assertEquals("_foo", RemoteStoreUtils.getSegmentName("_foo.cfe"));
}

public void testGetLuceneVersionForDocValuesUpdatesException() {
assertThrows(CorruptIndexException.class, () -> RemoteStoreUtils.getLuceneVersionForDocValuesUpdates("_0_1_Asserting_0.dvm"));
public void testGetSegmentNameForDvmFile() {
assertEquals("_bar", RemoteStoreUtils.getSegmentName("_bar_1_Lucene90_0.dvm"));
}

public void testGetSegmentNameWeirdSegmentNameOnlyUnderscore() {
// Validate behaviour when segment name contains delimiters only
assertEquals("_", RemoteStoreUtils.getSegmentName("_.dvm"));
}

public void testGetSegmentNameUnderscoreDelimiterOverrides() {
// Validate behaviour when segment name contains delimiters only
assertEquals("_", RemoteStoreUtils.getSegmentName("___.dvm"));
}

public void testGetSegmentNameException() {
assertThrows(IllegalArgumentException.class, () -> RemoteStoreUtils.getSegmentName("dvd"));
}
}

0 comments on commit 0e1adb4

Please sign in to comment.