Skip to content

Commit

Permalink
C486.Introduce cache index for searchable snapshots (#60522)
Browse files Browse the repository at this point in the history
If a searchable snapshot shard fails (e.g. its node leaves the cluster)
we want to be able to start it up again on a different node as quickly
as possible to avoid unnecessarily blocking or failing searches. It
isn't feasible to fully restore such shards in an acceptably short time.
In particular we would like to be able to deal with the `can_match`
phase of a search ASAP so that we can skip unnecessary waiting on shards
that may still be warming up but which are not required for the search.

This commit solves this problem by introducing a system index that holds
much of the data required to start a shard. Today(*) this means it holds
the contents of every file with size <8kB, and the first 4kB of every
other file in the shard. This system index acts as a second-level cache,
behind the first-level node-local disk cache but in front of the blob
store itself. Reading chunks from the index is slower than reading them
directly from disk, but faster than reading them from the blob store,
and is also replicated and accessible to all nodes in the cluster.

(*) the exact heuristics for what we should put into the system index
are still under investigation and may change in future.

This second-level cache is populated when we attempt to read a chunk
which is missing from both levels of cache and must therefore be read
from the blob store.

We also introduce `SearchableSnapshotsBlobStoreCacheIntegTests` which
verify that we do not hit the blob store more than necessary when
starting up a shard that we've seen before, whether due to a node
restart or because a snapshot was mounted multiple times.

Co-authored-by: David Turner <david.turner@elastic.co>
  • Loading branch information
tlrx and DaveCTurner authored Aug 26, 2020
1 parent b440d59 commit a20ff51
Show file tree
Hide file tree
Showing 29 changed files with 1,911 additions and 279 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -1223,6 +1223,7 @@ protected static boolean isXPackTemplate(String name) {
case "metrics":
case "metrics-settings":
case "metrics-mappings":
case ".snapshot-blob-cache":
return true;
default:
return false;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ public static Map<String, String> filterSecurityHeaders(Map<String, String> head
public static final String ASYNC_SEARCH_ORIGIN = "async_search";
public static final String IDP_ORIGIN = "idp";
public static final String STACK_ORIGIN = "stack";
public static final String SEARCHABLE_SNAPSHOTS_ORIGIN = "searchable_snapshots";

private ClientHelper() {}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
*/
package org.elasticsearch.xpack.core.searchablesnapshots;

import org.elasticsearch.Version;
import org.elasticsearch.cluster.routing.ShardRouting;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
Expand Down Expand Up @@ -134,16 +135,20 @@ public static class CacheIndexInputStats implements Writeable, ToXContentObject
private final Counter contiguousReads;
private final Counter nonContiguousReads;
private final Counter cachedBytesRead;
private final Counter indexCacheBytesRead;
private final TimedCounter cachedBytesWritten;
private final TimedCounter directBytesRead;
private final TimedCounter optimizedBytesRead;
private final Counter blobStoreBytesRequested;
private final long currentIndexCacheFills;

public CacheIndexInputStats(String fileName, long fileLength, long openCount, long closeCount,
Counter forwardSmallSeeks, Counter backwardSmallSeeks,
Counter forwardLargeSeeks, Counter backwardLargeSeeks,
Counter contiguousReads, Counter nonContiguousReads,
Counter cachedBytesRead, TimedCounter cachedBytesWritten,
TimedCounter directBytesRead, TimedCounter optimizedBytesRead) {
Counter cachedBytesRead, Counter indexCacheBytesRead,
TimedCounter cachedBytesWritten, TimedCounter directBytesRead, TimedCounter optimizedBytesRead,
Counter blobStoreBytesRequested, long currentIndexCacheFills) {
this.fileName = fileName;
this.fileLength = fileLength;
this.openCount = openCount;
Expand All @@ -155,9 +160,12 @@ public CacheIndexInputStats(String fileName, long fileLength, long openCount, lo
this.contiguousReads = contiguousReads;
this.nonContiguousReads = nonContiguousReads;
this.cachedBytesRead = cachedBytesRead;
this.indexCacheBytesRead = indexCacheBytesRead;
this.cachedBytesWritten = cachedBytesWritten;
this.directBytesRead = directBytesRead;
this.optimizedBytesRead = optimizedBytesRead;
this.blobStoreBytesRequested = blobStoreBytesRequested;
this.currentIndexCacheFills = currentIndexCacheFills;
}

CacheIndexInputStats(final StreamInput in) throws IOException {
Expand All @@ -172,9 +180,21 @@ public CacheIndexInputStats(String fileName, long fileLength, long openCount, lo
this.contiguousReads = new Counter(in);
this.nonContiguousReads = new Counter(in);
this.cachedBytesRead = new Counter(in);
if (in.getVersion().onOrAfter(Version.V_8_0_0)) {
this.indexCacheBytesRead = new Counter(in);
} else {
this.indexCacheBytesRead = new Counter(0, 0, 0, 0);
}
this.cachedBytesWritten = new TimedCounter(in);
this.directBytesRead = new TimedCounter(in);
this.optimizedBytesRead = new TimedCounter(in);
if (in.getVersion().onOrAfter(Version.V_8_0_0)) {
this.blobStoreBytesRequested = new Counter(in);
this.currentIndexCacheFills = in.readVLong();
} else {
this.blobStoreBytesRequested = new Counter(0, 0, 0, 0);
this.currentIndexCacheFills = 0;
}
}

@Override
Expand All @@ -191,9 +211,16 @@ public void writeTo(StreamOutput out) throws IOException {
contiguousReads.writeTo(out);
nonContiguousReads.writeTo(out);
cachedBytesRead.writeTo(out);
if (out.getVersion().onOrAfter(Version.V_8_0_0)) {
indexCacheBytesRead.writeTo(out);
}
cachedBytesWritten.writeTo(out);
directBytesRead.writeTo(out);
optimizedBytesRead.writeTo(out);
if (out.getVersion().onOrAfter(Version.V_8_0_0)) {
blobStoreBytesRequested.writeTo(out);
out.writeVLong(currentIndexCacheFills);
}
}

public String getFileName() {
Expand Down Expand Up @@ -240,6 +267,10 @@ public Counter getCachedBytesRead() {
return cachedBytesRead;
}

public Counter getIndexCacheBytesRead() {
return indexCacheBytesRead;
}

public TimedCounter getCachedBytesWritten() {
return cachedBytesWritten;
}
Expand All @@ -252,6 +283,14 @@ public TimedCounter getOptimizedBytesRead() {
return optimizedBytesRead;
}

public Counter getBlobStoreBytesRequested() {
return blobStoreBytesRequested;
}

public long getCurrentIndexCacheFills() {
return currentIndexCacheFills;
}

@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject();
Expand All @@ -263,6 +302,7 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
builder.field("contiguous_bytes_read", getContiguousReads());
builder.field("non_contiguous_bytes_read", getNonContiguousReads());
builder.field("cached_bytes_read", getCachedBytesRead());
builder.field("index_cache_bytes_read", getIndexCacheBytesRead());
builder.field("cached_bytes_written", getCachedBytesWritten());
builder.field("direct_bytes_read", getDirectBytesRead());
builder.field("optimized_bytes_read", getOptimizedBytesRead());
Expand All @@ -278,6 +318,8 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
builder.field("large", getBackwardLargeSeeks());
builder.endObject();
}
builder.field("blob_store_bytes_requested", getBlobStoreBytesRequested());
builder.field("current_index_cache_fills", getCurrentIndexCacheFills());
}
return builder.endObject();
}
Expand All @@ -302,9 +344,12 @@ public boolean equals(Object other) {
&& Objects.equals(contiguousReads, stats.contiguousReads)
&& Objects.equals(nonContiguousReads, stats.nonContiguousReads)
&& Objects.equals(cachedBytesRead, stats.cachedBytesRead)
&& Objects.equals(indexCacheBytesRead, stats.indexCacheBytesRead)
&& Objects.equals(cachedBytesWritten, stats.cachedBytesWritten)
&& Objects.equals(directBytesRead, stats.directBytesRead)
&& Objects.equals(optimizedBytesRead, stats.optimizedBytesRead);
&& Objects.equals(optimizedBytesRead, stats.optimizedBytesRead)
&& Objects.equals(blobStoreBytesRequested, stats.blobStoreBytesRequested)
&& currentIndexCacheFills == stats.currentIndexCacheFills;
}

@Override
Expand All @@ -313,8 +358,9 @@ public int hashCode() {
forwardSmallSeeks, backwardSmallSeeks,
forwardLargeSeeks, backwardLargeSeeks,
contiguousReads, nonContiguousReads,
cachedBytesRead, cachedBytesWritten,
directBytesRead, optimizedBytesRead);
cachedBytesRead, indexCacheBytesRead,
cachedBytesWritten, directBytesRead, optimizedBytesRead,
blobStoreBytesRequested, currentIndexCacheFills);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,4 +42,6 @@ public static boolean isSearchableSnapshotStore(Settings indexSettings) {

public static final String CACHE_PREWARMING_THREAD_POOL_NAME = "searchable_snapshots_cache_prewarming";
public static final String CACHE_PREWARMING_THREAD_POOL_SETTING = "xpack.searchable_snapshots.cache_prewarming_thread_pool";

public static final String SNAPSHOT_BLOB_CACHE_INDEX = ".snapshot-blob-cache";
}
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,9 @@ private CacheIndexInputStats randomCacheIndexInputStats() {
randomCounter(), randomCounter(),
randomCounter(), randomCounter(),
randomCounter(), randomCounter(),
randomCounter(), randomTimedCounter(),
randomTimedCounter(), randomTimedCounter());
randomCounter(), randomCounter(), randomTimedCounter(),
randomTimedCounter(), randomTimedCounter(),
randomCounter(), randomNonNegativeLong());
}

private Counter randomCounter() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,11 @@ teardown:
- gte: { indices.docs.shards.0.0.files.0.cached_bytes_read.min: 0 }
- gte: { indices.docs.shards.0.0.files.0.cached_bytes_read.max: 0 }

- gte: { indices.docs.shards.0.0.files.0.index_cache_bytes_read.count: 0 }
- gte: { indices.docs.shards.0.0.files.0.index_cache_bytes_read.sum: 0 }
- gte: { indices.docs.shards.0.0.files.0.index_cache_bytes_read.min: 0 }
- gte: { indices.docs.shards.0.0.files.0.index_cache_bytes_read.max: 0 }

- gte: { indices.docs.shards.0.0.files.0.cached_bytes_written.count: 0 }
- gte: { indices.docs.shards.0.0.files.0.cached_bytes_written.sum: 0 }
- gte: { indices.docs.shards.0.0.files.0.cached_bytes_written.min: 0 }
Expand Down Expand Up @@ -203,6 +208,13 @@ teardown:
- gte: { indices.docs.shards.0.0.files.0.backward_seeks.large.min: 0 }
- gte: { indices.docs.shards.0.0.files.0.backward_seeks.large.max: 0 }

- gte: { indices.docs.shards.0.0.files.0.blob_store_bytes_requested.count: 0 }
- gte: { indices.docs.shards.0.0.files.0.blob_store_bytes_requested.sum: 0 }
- gte: { indices.docs.shards.0.0.files.0.blob_store_bytes_requested.min: 0 }
- gte: { indices.docs.shards.0.0.files.0.blob_store_bytes_requested.max: 0 }

- gte: { indices.docs.shards.0.0.files.0.current_index_cache_fills: 0 }

- do:
searchable_snapshots.stats:
index: "d*"
Expand Down
Loading

0 comments on commit a20ff51

Please sign in to comment.