elastic · tlrx · Feb 19, 2021 · Feb 19, 2021 · tlrx · Feb 19, 2021
diff --git a/...s/src/internalClusterTest/java/org/apache/lucene/codecs/lucene50/CompoundReaderUtils.java b/...s/src/internalClusterTest/java/org/apache/lucene/codecs/lucene50/CompoundReaderUtils.java
@@ -0,0 +1,92 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.apache.lucene.codecs.lucene50;
+
+import org.apache.lucene.codecs.Codec;
+import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.codecs.CompoundDirectory;
+import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.SegmentCommitInfo;
+import org.apache.lucene.index.SegmentInfo;
+import org.apache.lucene.index.SegmentInfos;
+import org.apache.lucene.store.ChecksumIndexInput;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IOContext;
+import org.elasticsearch.common.collect.Tuple;
+
+import java.io.IOException;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+
+public class CompoundReaderUtils {
+
+    private CompoundReaderUtils() {}
+
+    public static Map<String, Map<String, Tuple<Long, Long>>> extractCompoundFiles(Directory directory) throws IOException {
+        final Map<String, Map<String, Tuple<Long, Long>>> compoundFiles = new HashMap<>();
+        final SegmentInfos segmentInfos = SegmentInfos.readLatestCommit(directory);
+        for (SegmentCommitInfo segmentCommitInfo : segmentInfos) {
+            final SegmentInfo segmentInfo = segmentCommitInfo.info;
+            if (segmentInfo.getUseCompoundFile()) {
+                final Codec codec = segmentInfo.getCodec();
+                try (CompoundDirectory compoundDir = codec.compoundFormat().getCompoundReader(directory, segmentInfo, IOContext.DEFAULT)) {
+                    String className = compoundDir.getClass().getName();
+                    switch (className) {
+                        case "org.apache.lucene.codecs.lucene50.Lucene50CompoundReader":
+                            compoundFiles.put(segmentInfo.name, readEntries(directory, segmentCommitInfo.info));
+                            break;
+                        default:
+                            assert false : "please implement readEntries() for this format of compound files: " + className;
+                            throw new IllegalStateException("This format of compound files is not supported: " + className);
+                    }
+                }
+            }
+        }
+        return Collections.unmodifiableMap(compoundFiles);
+    }
+
+    private static Map<String, Tuple<Long, Long>> readEntries(Directory directory, SegmentInfo segmentInfo) throws IOException {
+        final String entriesFileName = IndexFileNames.segmentFileName(segmentInfo.name, "", Lucene50CompoundFormat.ENTRIES_EXTENSION);
+        try (ChecksumIndexInput entriesStream = directory.openChecksumInput(entriesFileName, IOContext.READONCE)) {
+            Map<String, Tuple<Long, Long>> mapping = new HashMap<>();
+            Throwable trowable = null;
+            try {
+                CodecUtil.checkIndexHeader(
+                    entriesStream,
+                    Lucene50CompoundFormat.ENTRY_CODEC,
+                    Lucene50CompoundFormat.VERSION_START,
+                    Lucene50CompoundFormat.VERSION_CURRENT,
+                    segmentInfo.getId(),
+                    ""
+                );
+
+                final int numEntries = entriesStream.readVInt();
+                final Set<String> seen = new HashSet<>(numEntries);
+                for (int i = 0; i < numEntries; i++) {
+                    final String id = entriesStream.readString();
+                    if (seen.add(id) == false) {
+                        throw new CorruptIndexException("Duplicate cfs entry id=" + id + " in CFS ", entriesStream);
+                    }
+                    long offset = entriesStream.readLong();
+                    long length = entriesStream.readLong();
+                    mapping.put(id, Tuple.tuple(offset, length));
+                }
+                assert mapping.size() == numEntries;
+            } catch (Throwable exception) {
+                trowable = exception;
+            } finally {
+                CodecUtil.checkFooter(entriesStream, trowable);
+            }
+            return Collections.unmodifiableMap(mapping);
+        }
+    }
+}
diff --git a/...t/java/org/elasticsearch/blobstore/cache/SearchableSnapshotsBlobStoreCacheIntegTests.java b/...t/java/org/elasticsearch/blobstore/cache/SearchableSnapshotsBlobStoreCacheIntegTests.java
diff --git a/...ava/org/elasticsearch/xpack/searchablesnapshots/BaseSearchableSnapshotsIntegTestCase.java b/...ava/org/elasticsearch/xpack/searchablesnapshots/BaseSearchableSnapshotsIntegTestCase.java
@@ -26,6 +26,7 @@
 import org.elasticsearch.snapshots.SnapshotsService;
 import org.elasticsearch.xpack.core.searchablesnapshots.MountSearchableSnapshotAction;
 import org.elasticsearch.xpack.core.searchablesnapshots.MountSearchableSnapshotRequest;
+import org.elasticsearch.xpack.core.searchablesnapshots.MountSearchableSnapshotRequest.Storage;
 import org.elasticsearch.xpack.searchablesnapshots.cache.CacheService;
 import org.elasticsearch.xpack.searchablesnapshots.cache.FrozenCacheService;
 
@@ -116,6 +117,17 @@ protected void mountSnapshot(
         String indexName,
         String restoredIndexName,
         Settings restoredIndexSettings
+    ) throws Exception {
+        mountSnapshot(repositoryName, snapshotName, indexName, restoredIndexName, restoredIndexSettings, Storage.FULL_COPY);
+    }
+
+    protected void mountSnapshot(
+        String repositoryName,
+        String snapshotName,
+        String indexName,
+        String restoredIndexName,
+        Settings restoredIndexSettings,
+        final Storage storage
     ) throws Exception {
         final MountSearchableSnapshotRequest mountRequest = new MountSearchableSnapshotRequest(
             restoredIndexName,
@@ -128,7 +140,7 @@ protected void mountSnapshot(
                 .build(),
             Strings.EMPTY_ARRAY,
             true,
-            MountSearchableSnapshotRequest.Storage.FULL_COPY
+            storage
         );
 
         final RestoreSnapshotResponse restoreResponse = client().execute(MountSearchableSnapshotAction.INSTANCE, mountRequest).get();

diff --git a/...able-snapshots/src/main/java/org/elasticsearch/blobstore/cache/BlobStoreCacheService.java b/...able-snapshots/src/main/java/org/elasticsearch/blobstore/cache/BlobStoreCacheService.java
@@ -10,6 +10,7 @@
 import org.apache.logging.log4j.LogManager;
 import org.apache.logging.log4j.Logger;
 import org.apache.logging.log4j.message.ParameterizedMessage;
+import org.apache.lucene.index.IndexFileNames;
 import org.elasticsearch.ElasticsearchTimeoutException;
 import org.elasticsearch.ExceptionsHelper;
 import org.elasticsearch.Version;
@@ -24,14 +25,16 @@
 import org.elasticsearch.client.OriginSettingClient;
 import org.elasticsearch.cluster.block.ClusterBlockException;
 import org.elasticsearch.common.bytes.BytesReference;
-import org.elasticsearch.common.unit.ByteSizeUnit;
+import org.elasticsearch.common.util.set.Sets;
 import org.elasticsearch.common.xcontent.ToXContent;
 import org.elasticsearch.common.xcontent.XContentBuilder;
 import org.elasticsearch.node.NodeClosedException;
 import org.elasticsearch.threadpool.ThreadPool;
 import org.elasticsearch.transport.ConnectTransportException;
+import org.elasticsearch.xpack.searchablesnapshots.cache.ByteRange;
 
 import java.time.Instant;
+import java.util.Set;
 import java.util.concurrent.TimeUnit;
 
 import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
@@ -41,8 +44,6 @@ public class BlobStoreCacheService {
 
     private static final Logger logger = LogManager.getLogger(BlobStoreCacheService.class);
 
-    public static final int DEFAULT_CACHED_BLOB_SIZE = ByteSizeUnit.KB.toIntBytes(4);
-
     private final ThreadPool threadPool;
     private final Client client;
     private final String index;
@@ -155,4 +156,70 @@ public void onFailure(Exception e) {
             listener.onFailure(e);
         }
     }
+
+    private static Set<String> METADATA_FILES_EXTENSIONS = Set.of(
+        "cfe", // compound file's entry table
+        "dvm", // doc values metadata file
+        "fdm", // stored fields metadata file
+        "fnm", // field names metadata file
+        "kdm", // Lucene 8.6 point format metadata file
+        "nvm", // norms metadata file
+        "tmd", // Lucene 8.6 terms metadata file
+        "tvm", // terms vectors metadata file
+        "vem" // Lucene 9.0 indexed vectors metadata
+    );
+
+    private static Set<String> NON_METADATA_FILES_EXTENSIONS = Set.of(
+        "cfs",
+        "dii",
+        "dim",
+        "doc",
+        "dvd",
+        "fdt",
+        "fdx",
+        "kdd",
+        "kdi",
+        "liv",
+        "nvd",
+        "pay",
+        "pos",
+        "tim",
+        "tip",
+        "tvd",
+        "tvx",
+        "vec"
+    );
+
+    /**
+     * Computes the {@link ByteRange} corresponding to the header of a Lucene file. This range can vary depending of the type of the file
+     * which is indicated by the file's extension. The returned byte range can never be larger than the file's length but it can be smaller.
+     *
+     * @param fileName the name of the file
+     * @param fileLength the length of the file
+     * @return
+     */
+    public static ByteRange computeHeaderByteRange(String fileName, long fileLength) {
+        assert Sets.intersection(METADATA_FILES_EXTENSIONS, NON_METADATA_FILES_EXTENSIONS).isEmpty();
+        final String fileExtension = IndexFileNames.getExtension(fileName);
+        if (METADATA_FILES_EXTENSIONS.contains(fileExtension)) {
+            return upTo64kb(fileLength);
+        } else {
+            if (NON_METADATA_FILES_EXTENSIONS.contains(fileExtension) == false) {
+                // TODO maybe log less?
+                logger.warn("Blob store cache failed to detect Lucene file extension [{}], using default cache file size", fileExtension);
+            }
+            return upTo1kb(fileLength);
+        }
+    }
+
+    private static ByteRange upTo64kb(long fileLength) {
+        if (fileLength > 65536L) {
+            return upTo1kb(fileLength);
+        }
+        return ByteRange.of(0L, fileLength);
+    }
+
+    private static ByteRange upTo1kb(long fileLength) {
+        return ByteRange.of(0L, Math.min(fileLength, 1024L));
+    }
 }
diff --git a/...ugin/searchable-snapshots/src/main/java/org/elasticsearch/blobstore/cache/CachedBlob.java b/...ugin/searchable-snapshots/src/main/java/org/elasticsearch/blobstore/cache/CachedBlob.java
@@ -121,6 +121,10 @@ public BytesReference bytes() {
         return bytes;
     }
 
+    public Version version() {
+        return version;
+    }
+
     public static String generateId(String repository, String name, String path, long offset) {
         return String.join("/", repository, path, name, "@" + offset);
     }
@@ -187,4 +191,27 @@ public static CachedBlob fromSource(final Map<String, Object> source) {
             to.longValue()
         );
     }
+
+    @Override
+    public String toString() {
+        return "CachedBlob ["
+            + "creationTime="
+            + creationTime
+            + ", version="
+            + version
+            + ", repository='"
+            + repository
+            + '\''
+            + ", name='"
+            + name
+            + '\''
+            + ", path='"
+            + path
+            + '\''
+            + ", from="
+            + from
+            + ", to="
+            + to
+            + ']';
+    }
 }