Skip to content

Commit

Permalink
Add metrics around the file extensions we request when populating the…
Browse files Browse the repository at this point in the history
… cache

This adds the file extentions for the blobs we request when populating the
cache.
The possible values for lucene extensions are around 50 and we use a special
"other" category for everything else, as a fallback.
  • Loading branch information
andreidan committed Feb 21, 2025
1 parent 45836c8 commit adf8a21
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 9 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.index.store.LuceneFilesExtensions;
import org.elasticsearch.telemetry.TelemetryProvider;
import org.elasticsearch.telemetry.metric.DoubleHistogram;
import org.elasticsearch.telemetry.metric.LongCounter;
Expand All @@ -24,8 +25,8 @@ public class BlobCacheMetrics {
private static final double BYTES_PER_NANOSECONDS_TO_MEBIBYTES_PER_SECOND = 1e9D / (1 << 20);
public static final String CACHE_POPULATION_REASON_ATTRIBUTE_KEY = "reason";
public static final String CACHE_POPULATION_SOURCE_ATTRIBUTE_KEY = "source";
public static final String SHARD_ID_ATTRIBUTE_KEY = "shard_id";
public static final String INDEX_ATTRIBUTE_KEY = "index_name";
public static final String LUCENE_FILE_EXTENSION_ATTRIBUTE_KEY = "file_extension";
public static final String NON_LUCENE_EXTENSION_TO_RECORD = "other";

private final LongCounter cacheMissCounter;
private final LongCounter evictedCountNonZeroFrequency;
Expand Down Expand Up @@ -113,22 +114,28 @@ public LongHistogram getCacheMissLoadTimes() {
/**
* Record the various cache population metrics after a chunk is copied to the cache
*
* @param blobName The file that was requested and triggered the cache population.
* @param bytesCopied The number of bytes copied
* @param copyTimeNanos The time taken to copy the bytes in nanoseconds
* @param cachePopulationReason The reason for the cache being populated
* @param cachePopulationSource The source from which the data is being loaded
*/
public void recordCachePopulationMetrics(
String blobName,
int bytesCopied,
long copyTimeNanos,
CachePopulationReason cachePopulationReason,
CachePopulationSource cachePopulationSource
) {
LuceneFilesExtensions luceneFilesExtensions = LuceneFilesExtensions.fromFile(blobName);
String blobFileExtension = luceneFilesExtensions != null ? luceneFilesExtensions.getExtension() : NON_LUCENE_EXTENSION_TO_RECORD;
Map<String, Object> metricAttributes = Map.of(
CACHE_POPULATION_REASON_ATTRIBUTE_KEY,
cachePopulationReason.name(),
CACHE_POPULATION_SOURCE_ATTRIBUTE_KEY,
cachePopulationSource.name()
cachePopulationSource.name(),
LUCENE_FILE_EXTENSION_ATTRIBUTE_KEY,
blobFileExtension
);
assert bytesCopied > 0 : "We shouldn't be recording zero-sized copies";
cachePopulationBytes.incrementBy(bytesCopied, metricAttributes);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,18 @@
package org.elasticsearch.blobcache;

import org.elasticsearch.common.unit.ByteSizeValue;
import org.elasticsearch.index.store.LuceneFilesExtensions;
import org.elasticsearch.telemetry.InstrumentType;
import org.elasticsearch.telemetry.Measurement;
import org.elasticsearch.telemetry.RecordingMeterRegistry;
import org.elasticsearch.test.ESTestCase;
import org.junit.Before;

import java.util.Arrays;
import java.util.concurrent.TimeUnit;

import static org.hamcrest.Matchers.is;

public class BlobCacheMetricsTests extends ESTestCase {

private RecordingMeterRegistry recordingMeterRegistry;
Expand All @@ -32,7 +36,10 @@ public void testRecordCachePopulationMetricsRecordsThroughput() {
int secondsTaken = randomIntBetween(1, 5);
BlobCacheMetrics.CachePopulationReason cachePopulationReason = randomFrom(BlobCacheMetrics.CachePopulationReason.values());
CachePopulationSource cachePopulationSource = randomFrom(CachePopulationSource.values());
String fileExtension = randomFrom(Arrays.stream(LuceneFilesExtensions.values()).map(LuceneFilesExtensions::getExtension).toList());
String luceneBlobFile = randomAlphanumericOfLength(15) + "." + fileExtension;
metrics.recordCachePopulationMetrics(
luceneBlobFile,
Math.toIntExact(ByteSizeValue.ofMb(mebiBytesSent).getBytes()),
TimeUnit.SECONDS.toNanos(secondsTaken),
cachePopulationReason,
Expand All @@ -44,29 +51,31 @@ public void testRecordCachePopulationMetricsRecordsThroughput() {
.getMeasurements(InstrumentType.DOUBLE_HISTOGRAM, "es.blob_cache.population.throughput.histogram")
.get(0);
assertEquals(throughputMeasurement.getDouble(), (double) mebiBytesSent / secondsTaken, 0.0);
assertExpectedAttributesPresent(throughputMeasurement, cachePopulationReason, cachePopulationSource);
assertExpectedAttributesPresent(throughputMeasurement, cachePopulationReason, cachePopulationSource, fileExtension);

// bytes counter
Measurement totalBytesMeasurement = recordingMeterRegistry.getRecorder()
.getMeasurements(InstrumentType.LONG_COUNTER, "es.blob_cache.population.bytes.total")
.get(0);
assertEquals(totalBytesMeasurement.getLong(), ByteSizeValue.ofMb(mebiBytesSent).getBytes());
assertExpectedAttributesPresent(totalBytesMeasurement, cachePopulationReason, cachePopulationSource);
assertExpectedAttributesPresent(totalBytesMeasurement, cachePopulationReason, cachePopulationSource, fileExtension);

// time counter
Measurement totalTimeMeasurement = recordingMeterRegistry.getRecorder()
.getMeasurements(InstrumentType.LONG_COUNTER, "es.blob_cache.population.time.total")
.get(0);
assertEquals(totalTimeMeasurement.getLong(), TimeUnit.SECONDS.toMillis(secondsTaken));
assertExpectedAttributesPresent(totalTimeMeasurement, cachePopulationReason, cachePopulationSource);
assertExpectedAttributesPresent(totalTimeMeasurement, cachePopulationReason, cachePopulationSource, fileExtension);
}

private static void assertExpectedAttributesPresent(
Measurement measurement,
BlobCacheMetrics.CachePopulationReason cachePopulationReason,
CachePopulationSource cachePopulationSource
CachePopulationSource cachePopulationSource,
String fileExtension
) {
assertEquals(measurement.attributes().get(BlobCacheMetrics.CACHE_POPULATION_REASON_ATTRIBUTE_KEY), cachePopulationReason.name());
assertEquals(measurement.attributes().get(BlobCacheMetrics.CACHE_POPULATION_SOURCE_ATTRIBUTE_KEY), cachePopulationSource.name());
assertThat(measurement.attributes().get(BlobCacheMetrics.CACHE_POPULATION_REASON_ATTRIBUTE_KEY), is(cachePopulationReason.name()));
assertThat(measurement.attributes().get(BlobCacheMetrics.CACHE_POPULATION_SOURCE_ATTRIBUTE_KEY), is(cachePopulationSource.name()));
assertThat(measurement.attributes().get(BlobCacheMetrics.LUCENE_FILE_EXTENSION_ATTRIBUTE_KEY), is(fileExtension));
}
}

0 comments on commit adf8a21

Please sign in to comment.