Use blob store cache for Lucene compound files (#69861) (#69990)

The blob store cache is used to cache a variable length of the begining of Lucene files in the .snapshot-blob-cache system index. This is useful to speed up Lucene directory opening during shard recovery and to limit the number of bytes downloaded from the blob store when a searchable snapshot shard must be rebuilt. This commit adds support for compound files segment (.cfs) when they are partially cached (ie, Storage.SHARED_CACHE) so that the files they are composed of can also be cached in the blob store cache index. Co-Authored-By: Yannick Welsch <yannick@welsch.lu> Backport of #69861 for 7.x
elastic · Mar 4, 2021 · 5545033 · 5545033
1 parent 8e1a615
commit 5545033
Show file tree

Hide file tree

Showing 10 changed files with 291 additions and 269 deletions.
diff --git a/test/framework/src/main/java/org/elasticsearch/common/lucene/store/ESIndexInputTestCase.java b/test/framework/src/main/java/org/elasticsearch/common/lucene/store/ESIndexInputTestCase.java
@@ -72,7 +72,7 @@ protected byte[] randomReadAndSlice(IndexInput indexInput, int length) throws IO
                 case 3:
                     // Read using slice
                     len = randomIntBetween(1, length - readPos);
-                    IndexInput slice = indexInput.slice("slice (" + readPos + ", " + len + ") of " + indexInput, readPos, len);
+                    IndexInput slice = indexInput.slice(randomAlphaOfLength(10) + randomFileExtension(), readPos, len);
                     temp = randomReadAndSlice(slice, len);
                     // assert that position in the original input didn't change
                     assertEquals(readPos, indexInput.getFilePointer());
@@ -121,7 +121,7 @@ protected void doRun() throws Exception {
                                     clone = indexInput.clone();
                                 } else {
                                     final int sliceEnd = between(readEnd, length);
-                                    clone = indexInput.slice("concurrent slice (0, " + sliceEnd + ") of " + indexInput, 0L, sliceEnd);
+                                    clone = indexInput.slice("slice" + randomAlphaOfLength(10) + randomFileExtension(), 0L, sliceEnd);
                                 }
                                 startLatch.countDown();
                                 startLatch.await();
@@ -178,4 +178,34 @@ public void onRejection(Exception e) {
         return output;
     }
 
+    protected static String randomFileExtension() {
+        return randomFrom(
+            ".cfe",
+            ".cfs",
+            ".dii",
+            ".dim",
+            ".doc",
+            ".dvd",
+            ".dvm",
+            ".fdt",
+            ".fdx",
+            ".fdm",
+            ".fnm",
+            ".kdd",
+            ".kdi",
+            ".kdm",
+            ".liv",
+            ".nvd",
+            ".nvm",
+            ".pay",
+            ".pos",
+            ".tim",
+            ".tip",
+            ".tmd",
+            ".tvd",
+            ".tvx",
+            ".vec",
+            ".vem"
+        );
+    }
 }
diff --git a/...t/java/org/elasticsearch/blobstore/cache/SearchableSnapshotsBlobStoreCacheIntegTests.java b/...t/java/org/elasticsearch/blobstore/cache/SearchableSnapshotsBlobStoreCacheIntegTests.java
@@ -9,7 +9,6 @@
 
 import org.elasticsearch.action.admin.indices.forcemerge.ForceMergeResponse;
 import org.elasticsearch.action.admin.indices.refresh.RefreshResponse;
-import org.elasticsearch.action.get.GetResponse;
 import org.elasticsearch.action.index.IndexRequestBuilder;
 import org.elasticsearch.action.support.IndicesOptions;
 import org.elasticsearch.client.Client;
@@ -22,7 +21,6 @@
 import org.elasticsearch.cluster.routing.allocation.decider.AllocationDecider;
 import org.elasticsearch.cluster.routing.allocation.decider.Decision;
 import org.elasticsearch.common.Strings;
-import org.elasticsearch.common.io.Streams;
 import org.elasticsearch.common.settings.ClusterSettings;
 import org.elasticsearch.common.settings.Setting;
 import org.elasticsearch.common.settings.Settings;
@@ -33,10 +31,8 @@
 import org.elasticsearch.common.xcontent.XContentFactory;
 import org.elasticsearch.index.IndexNotFoundException;
 import org.elasticsearch.index.shard.IndexingStats;
-import org.elasticsearch.index.snapshots.blobstore.BlobStoreIndexShardSnapshot;
 import org.elasticsearch.plugins.ClusterPlugin;
 import org.elasticsearch.plugins.Plugin;
-import org.elasticsearch.repositories.blobstore.BlobStoreRepository;
 import org.elasticsearch.snapshots.SnapshotId;
 import org.elasticsearch.snapshots.SnapshotsService;
 import org.elasticsearch.test.InternalTestCluster;
@@ -49,32 +45,26 @@
 import org.elasticsearch.xpack.searchablesnapshots.SearchableSnapshotsConstants;
 import org.elasticsearch.xpack.searchablesnapshots.action.SearchableSnapshotsStatsAction;
 import org.elasticsearch.xpack.searchablesnapshots.action.SearchableSnapshotsStatsRequest;
-import org.elasticsearch.xpack.searchablesnapshots.cache.ByteRange;
 import org.elasticsearch.xpack.searchablesnapshots.cache.CacheService;
 import org.elasticsearch.xpack.searchablesnapshots.cache.FrozenCacheService;
 import org.junit.AfterClass;
 import org.junit.BeforeClass;
 
-import java.io.IOException;
-import java.nio.file.Files;
 import java.nio.file.Path;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Collections;
-import java.util.HashMap;
 import java.util.List;
 import java.util.Locale;
-import java.util.Map;
 
 import static org.elasticsearch.index.mapper.MapperService.SINGLE_MAPPING_NAME;
-import static org.elasticsearch.repositories.blobstore.BlobStoreRepository.INDEX_SHARD_SNAPSHOT_FORMAT;
 import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
 import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount;
 import static org.elasticsearch.xpack.searchablesnapshots.SearchableSnapshots.DATA_TIERS_CACHE_INDEX_PREFERENCE;
 import static org.elasticsearch.xpack.searchablesnapshots.SearchableSnapshotsConstants.SNAPSHOT_BLOB_CACHE_INDEX;
 import static org.hamcrest.Matchers.equalTo;
 import static org.hamcrest.Matchers.greaterThan;
-import static org.hamcrest.Matchers.is;
+import static org.hamcrest.Matchers.greaterThanOrEqualTo;
 
 public class SearchableSnapshotsBlobStoreCacheIntegTests extends BaseSearchableSnapshotsIntegTestCase {
 
@@ -117,6 +107,11 @@ protected int numberOfReplicas() {
         return 0;
     }
 
+    @Override
+    protected int numberOfShards() {
+        return 1;
+    }
+
     @Override
     protected Settings nodeSettings(int nodeOrdinal) {
         return Settings.builder().put(super.nodeSettings(nodeOrdinal)).put(cacheSettings).build();
@@ -156,17 +151,13 @@ public void testBlobStoreCache() throws Exception {
         final SnapshotId snapshot = createSnapshot(repositoryName, "test-snapshot", Collections.singletonList(indexName)).snapshotId();
         assertAcked(client().admin().indices().prepareDelete(indexName));
 
-        // extract the list of blobs per shard from the snapshot directory on disk
-        final Map<String, BlobStoreIndexShardSnapshot> blobsInSnapshot = blobsInSnapshot(repositoryLocation, snapshot.getUUID());
-        assertThat("Failed to load all shard snapshot metadata files", blobsInSnapshot.size(), equalTo(numberOfShards.numPrimaries));
-
         expectThrows(
             IndexNotFoundException.class,
             ".snapshot-blob-cache system index should not be created yet",
             () -> systemClient().admin().indices().prepareGetIndex().addIndices(SNAPSHOT_BLOB_CACHE_INDEX).get()
         );
 
-        Storage storage = randomFrom(Storage.values());
+        final Storage storage = randomFrom(Storage.values());
         logger.info(
             "--> mount snapshot [{}] as an index for the first time [storage={}, max length={}]",
             snapshot,
@@ -211,7 +202,8 @@ public void testBlobStoreCache() throws Exception {
 
         logger.info("--> verifying cached documents in system index [{}]", SNAPSHOT_BLOB_CACHE_INDEX);
         if (numberOfDocs > 0) {
-            assertCachedBlobsInSystemIndex(repositoryName, blobsInSnapshot);
+            ensureYellow(SNAPSHOT_BLOB_CACHE_INDEX);
+            refreshSystemIndex();
 
             logger.info("--> verifying system index [{}] data tiers preference", SNAPSHOT_BLOB_CACHE_INDEX);
             assertThat(
@@ -244,7 +236,14 @@ public void testBlobStoreCache() throws Exception {
         assertHitCount(client().prepareSearch(restoredIndex).setSize(0).setTrackTotalHits(true).get(), numberOfDocs);
         assertAcked(client().admin().indices().prepareDelete(restoredIndex));
 
-        storage = randomFrom(Storage.values());
+        assertBusy(() -> {
+            refreshSystemIndex();
+            assertThat(
+                systemClient().prepareSearch(SNAPSHOT_BLOB_CACHE_INDEX).setSize(0).get().getHits().getTotalHits().value,
+                greaterThan(0L)
+            );
+        });
+
         logger.info("--> mount snapshot [{}] as an index for the second time [storage={}]", snapshot, storage);
         final String restoredAgainIndex = randomBoolean() ? indexName : randomAlphaOfLength(10).toLowerCase(Locale.ROOT);
         mountSnapshot(
@@ -261,22 +260,19 @@ public void testBlobStoreCache() throws Exception {
         );
         ensureGreen(restoredAgainIndex);
 
-        logger.info("--> verifying cached documents (after second mount) in system index [{}]", SNAPSHOT_BLOB_CACHE_INDEX);
-        if (numberOfDocs > 0) {
-            assertCachedBlobsInSystemIndex(repositoryName, blobsInSnapshot);
-        }
-
         logger.info("--> verifying shards of [{}] were started without using the blob store more than necessary", restoredAgainIndex);
         for (final SearchableSnapshotShardStats shardStats : client().execute(
             SearchableSnapshotsStatsAction.INSTANCE,
             new SearchableSnapshotsStatsRequest()
         ).actionGet().getStats()) {
             for (final SearchableSnapshotShardStats.CacheIndexInputStats indexInputStats : shardStats.getStats()) {
-                // we read the header of each file contained within the .cfs file, which could be anywhere
-                final boolean mayReadMoreThanHeader = indexInputStats.getFileExt().equals("cfs");
-                if (mayReadMoreThanHeader == false) {
-                    assertThat(Strings.toString(indexInputStats), indexInputStats.getBlobStoreBytesRequested().getCount(), equalTo(0L));
-                }
+                assertThat(
+                    Strings.toString(indexInputStats),
+                    indexInputStats.getBlobStoreBytesRequested().getCount(),
+                    storage == Storage.SHARED_CACHE ? equalTo(0L)
+                        : indexInputStats.getFileExt().equals("cfs") ? greaterThanOrEqualTo(0L)
+                        : equalTo(0L)
+                );
             }
         }
 
@@ -314,22 +310,19 @@ public Settings onNodeStopped(String nodeName) throws Exception {
         });
         ensureGreen(restoredAgainIndex);
 
-        logger.info("--> verifying cached documents (after restart) in system index [{}]", SNAPSHOT_BLOB_CACHE_INDEX);
-        if (numberOfDocs > 0) {
-            assertCachedBlobsInSystemIndex(repositoryName, blobsInSnapshot);
-        }
-
         logger.info("--> shards of [{}] should start without downloading bytes from the blob store", restoredAgainIndex);
         for (final SearchableSnapshotShardStats shardStats : client().execute(
             SearchableSnapshotsStatsAction.INSTANCE,
             new SearchableSnapshotsStatsRequest()
         ).actionGet().getStats()) {
             for (final SearchableSnapshotShardStats.CacheIndexInputStats indexInputStats : shardStats.getStats()) {
-                // we read the header of each file contained within the .cfs file, which could be anywhere
-                final boolean mayReadMoreThanHeader = indexInputStats.getFileExt().equals("cfs");
-                if (mayReadMoreThanHeader == false) {
-                    assertThat(Strings.toString(indexInputStats), indexInputStats.getBlobStoreBytesRequested().getCount(), equalTo(0L));
-                }
+                assertThat(
+                    Strings.toString(indexInputStats),
+                    indexInputStats.getBlobStoreBytesRequested().getCount(),
+                    storage == Storage.SHARED_CACHE ? equalTo(0L)
+                        : indexInputStats.getFileExt().equals("cfs") ? greaterThanOrEqualTo(0L)
+                        : equalTo(0L)
+                );
             }
         }
 
@@ -373,61 +366,6 @@ private void refreshSystemIndex() {
         }
     }
 
-    /**
-     * Reads a repository location on disk and extracts the list of blobs for each shards
-     */
-    private Map<String, BlobStoreIndexShardSnapshot> blobsInSnapshot(Path repositoryLocation, String snapshotId) throws IOException {
-        final Map<String, BlobStoreIndexShardSnapshot> blobsPerShard = new HashMap<>();
-        forEachFileRecursively(repositoryLocation.resolve("indices"), ((file, basicFileAttributes) -> {
-            final String fileName = file.getFileName().toString();
-            if (fileName.equals(BlobStoreRepository.SNAPSHOT_FORMAT.blobName(snapshotId))) {
-                blobsPerShard.put(
-                    String.join(
-                        "/",
-                        snapshotId,
-                        file.getParent().getParent().getFileName().toString(),
-                        file.getParent().getFileName().toString()
-                    ),
-                    INDEX_SHARD_SNAPSHOT_FORMAT.deserialize(fileName, xContentRegistry(), Streams.readFully(Files.newInputStream(file)))
-                );
-            }
-        }));
-        return Collections.unmodifiableMap(blobsPerShard);
-    }
-
-    private void assertCachedBlobsInSystemIndex(final String repositoryName, final Map<String, BlobStoreIndexShardSnapshot> blobsInSnapshot)
-        throws Exception {
-        final BlobStoreCacheService blobCacheService = internalCluster().getDataNodeInstance(BlobStoreCacheService.class);
-        assertBusy(() -> {
-            refreshSystemIndex();
-
-            long numberOfCachedBlobs = 0L;
-            for (Map.Entry<String, BlobStoreIndexShardSnapshot> blob : blobsInSnapshot.entrySet()) {
-                for (BlobStoreIndexShardSnapshot.FileInfo fileInfo : blob.getValue().indexFiles()) {
-                    if (fileInfo.name().startsWith("__") == false) {
-                        continue;
-                    }
-
-                    final String fileName = fileInfo.physicalName();
-                    final long length = fileInfo.length();
-                    final ByteRange expectedByteRange = blobCacheService.computeBlobCacheByteRange(fileName, length, blobCacheMaxLength);
-                    final String path = String.join("/", repositoryName, blob.getKey(), fileName, "@" + expectedByteRange.start());
-
-                    final GetResponse getResponse = systemClient().prepareGet(SNAPSHOT_BLOB_CACHE_INDEX, SINGLE_MAPPING_NAME, path).get();
-                    assertThat("Expected cached blob [" + path + "] for blob [" + fileInfo + "]", getResponse.isExists(), is(true));
-                    final CachedBlob cachedBlob = CachedBlob.fromSource(getResponse.getSourceAsMap());
-                    assertThat(cachedBlob.from(), equalTo(expectedByteRange.start()));
-                    assertThat(cachedBlob.to(), equalTo(expectedByteRange.end()));
-                    assertThat((long) cachedBlob.length(), equalTo(expectedByteRange.length()));
-                    numberOfCachedBlobs += 1;
-                }
-            }
-
-            refreshSystemIndex();
-            assertHitCount(systemClient().prepareSearch(SNAPSHOT_BLOB_CACHE_INDEX).setSize(0).get(), numberOfCachedBlobs);
-        });
-    }
-
     /**
      * This plugin declares an {@link AllocationDecider} that forces searchable snapshot shards to be allocated after
      * the primary shards of the snapshot blob cache index are started. This way we can ensure that searchable snapshot

diff --git a/...apshots/src/main/java/org/elasticsearch/index/store/BaseSearchableSnapshotIndexInput.java b/...apshots/src/main/java/org/elasticsearch/index/store/BaseSearchableSnapshotIndexInput.java
@@ -30,6 +30,7 @@
 public abstract class BaseSearchableSnapshotIndexInput extends BufferedIndexInput {
 
     protected final Logger logger;
+    protected final String name;
     protected final SearchableSnapshotDirectory directory;
     protected final BlobContainer blobContainer;
     protected final FileInfo fileInfo;
@@ -38,16 +39,18 @@ public abstract class BaseSearchableSnapshotIndexInput extends BufferedIndexInpu
     protected final long offset;
     protected final long length;
 
-    /** Range of bytes that should be cached in the blob cache for the current index input **/
-    protected final ByteRange blobCacheByteRange;
+    /**
+     * Range of bytes that should be cached in the blob cache for the current index input's header.
+     */
+    protected final ByteRange headerBlobCacheByteRange;
 
     // the following are only mutable so they can be adjusted after cloning/slicing
     protected volatile boolean isClone;
     private AtomicBoolean closed;
 
     public BaseSearchableSnapshotIndexInput(
         Logger logger,
-        String resourceDesc,
+        String name,
         SearchableSnapshotDirectory directory,
         FileInfo fileInfo,
         IOContext context,
@@ -56,15 +59,16 @@ public BaseSearchableSnapshotIndexInput(
         long length,
         ByteRange blobCacheByteRange
     ) {
-        super(resourceDesc, context);
+        super(name, context);
+        this.name = Objects.requireNonNull(name);
         this.logger = Objects.requireNonNull(logger);
         this.directory = Objects.requireNonNull(directory);
         this.blobContainer = Objects.requireNonNull(directory.blobContainer());
         this.fileInfo = Objects.requireNonNull(fileInfo);
         this.context = Objects.requireNonNull(context);
         assert fileInfo.metadata().hashEqualsContents() == false
             : "this method should only be used with blobs that are NOT stored in metadata's hash field " + "(fileInfo: " + fileInfo + ')';
-        this.blobCacheByteRange = Objects.requireNonNull(blobCacheByteRange);
+        this.headerBlobCacheByteRange = Objects.requireNonNull(blobCacheByteRange);
         this.stats = Objects.requireNonNull(stats);
         this.offset = offset;
         this.length = length;
@@ -77,6 +81,13 @@ public final long length() {
         return length;
     }
 
+    protected long getAbsolutePosition() {
+        final long position = getFilePointer() + this.offset;
+        assert position >= 0L : "absolute position is negative: " + position;
+        assert position <= fileInfo.length() : position + " vs " + fileInfo.length();
+        return position;
+    }
+
     @Override
     protected final void readInternal(ByteBuffer b) throws IOException {
         assert assertCurrentThreadIsNotCacheFetchAsync();
@@ -107,7 +118,7 @@ private boolean maybeReadChecksumFromFileInfo(ByteBuffer b) throws IOException {
         if (remaining > CodecUtil.footerLength()) {
             return false;
         }
-        final long position = getFilePointer() + this.offset;
+        final long position = getAbsolutePosition();
         final long checksumPosition = fileInfo.length() - CodecUtil.footerLength();
         if (position < checksumPosition) {
             return false;
@@ -132,6 +143,13 @@ private boolean maybeReadChecksumFromFileInfo(ByteBuffer b) throws IOException {
         return success;
     }
 
+    protected ByteRange maybeReadFromBlobCache(long position, int length) {
+        if (headerBlobCacheByteRange.contains(position, position + length)) {
+            return headerBlobCacheByteRange;
+        }
+        return ByteRange.EMPTY;
+    }
+
     /**
      * Opens an {@link InputStream} for the given range of bytes which reads the data directly from the blob store. If the requested range
      * spans multiple blobs then this stream will request them in turn.