From cfb992c1e2871cbfd72e64258856a78dcbd98585 Mon Sep 17 00:00:00 2001 From: Manoj Nagarajan Date: Thu, 14 Nov 2024 11:21:56 -0800 Subject: [PATCH 01/19] otel integration to venice-router --- build.gradle | 5 + .../AbstractVeniceAggVersionedStats.java | 6 +- .../stats/AggHostLevelIngestionStats.java | 4 +- .../stats/AggKafkaConsumerServiceStats.java | 4 +- .../davinci/stats/VeniceVersionedStats.java | 4 +- .../stats/VeniceVersionedStatsReporter.java | 4 +- .../heartbeat/HeartbeatVersionedStats.java | 4 +- internal/venice-client-common/build.gradle | 5 + .../com/linkedin/venice/read/RequestType.java | 13 +- .../venice/stats/AbstractVeniceAggStats.java | 61 +++- ...va => StatsSupplierMetricsRepository.java} | 2 +- .../StatsSupplierVeniceMetricsRepository.java | 19 ++ .../linkedin/venice/stats/TehutiUtils.java | 14 + .../venice/stats/VeniceMetricsConfig.java | 235 ++++++++++++++ .../venice/stats/VeniceMetricsRepository.java | 192 +++++++++++ .../VeniceOpenTelemetryConfigProperties.java | 284 +++++++++++++++++ .../VeniceOpenTelemetryMetricFormat.java | 34 ++ .../VeniceOpenTelemetryMetricsRepository.java | 297 ++++++++++++++++++ .../venice/utils/VeniceProperties.java | 4 + .../utils/metrics/MetricsRepositoryUtils.java | 19 ++ .../venice/stats/VeniceMetricsConfigTest.java | 107 +++++++ .../stats/VeniceMetricsRepositoryTest.java | 130 ++++++++ ...iceOpenTelemetryMetricsRepositoryTest.java | 141 +++++++++ .../stats/AbstractVeniceAggStoreStats.java | 7 +- .../VeniceHttpResponseStatusCodeCategory.java | 108 +++++++ .../dimensions/VeniceMetricsDimensions.java | 48 +++ .../VeniceRequestRetryAbortReason.java | 17 + .../dimensions/VeniceRequestRetryType.java | 15 + .../VeniceRequestValidationOutcome.java | 15 + .../VeniceResponseStatusCategory.java | 15 + ...iceHttpResponseStatusCodeCategoryTest.java | 56 ++++ .../VeniceMetricsDimensionsTest.java | 123 ++++++++ .../VeniceRequestRetryAbortReasonTest.java | 30 ++ .../VeniceRequestRetryTypeTest.java | 24 ++ .../VeniceRequestValidationOutcomeTest.java | 24 ++ .../VeniceResponseStatusCategoryTest.java | 33 ++ .../utils/VeniceRouterWrapper.java | 15 +- .../router/api/TestVeniceDispatcher.java | 4 +- services/venice-router/build.gradle | 4 + .../linkedin/venice/router/RouterServer.java | 49 ++- .../api/RouterExceptionAndTrackingUtils.java | 6 +- .../venice/router/api/VeniceDispatcher.java | 4 +- .../venice/router/api/VenicePathParser.java | 6 +- .../router/api/VeniceResponseAggregator.java | 19 +- .../router/api/VeniceVersionFinder.java | 6 +- .../api/routing/helix/HelixGroupSelector.java | 4 +- .../ApacheHttpAsyncStorageNodeClient.java | 4 +- .../router/stats/AdminOperationsStats.java | 4 +- .../router/stats/AggHostHealthStats.java | 11 +- .../stats/AggRouterHttpRequestStats.java | 79 +++-- .../venice/router/stats/HealthCheckStats.java | 4 +- .../venice/router/stats/HelixGroupStats.java | 4 +- .../venice/router/stats/HostHealthStats.java | 4 +- .../router/stats/RouteHttpRequestStats.java | 8 +- .../venice/router/stats/RouteHttpStats.java | 8 +- .../stats/RouterCurrentVersionStats.java | 4 +- .../router/stats/RouterHttpRequestStats.java | 272 +++++++++++++--- .../router/stats/RouterThrottleStats.java | 4 +- .../venice/router/stats/SecurityStats.java | 4 +- .../router/stats/StaleVersionStats.java | 4 +- .../router/AggRouterHttpRequestStatsTest.java | 29 +- .../router/RouteHttpRequestStatsTest.java | 15 +- .../router/api/TestVeniceDelegateMode.java | 10 +- .../router/api/TestVenicePathParser.java | 15 +- .../api/TestVeniceResponseAggregator.java | 2 +- .../router/api/TestVeniceVersionFinder.java | 18 +- .../api/path/TestVeniceMultiGetPath.java | 7 +- .../router/api/path/TestVenicePath.java | 6 +- .../stats/AdminOperationsStatsTest.java | 4 +- .../stats/AggServerHttpRequestStats.java | 2 +- 70 files changed, 2525 insertions(+), 217 deletions(-) rename internal/venice-client-common/src/main/java/com/linkedin/venice/stats/{StatsSupplier.java => StatsSupplierMetricsRepository.java} (89%) create mode 100644 internal/venice-client-common/src/main/java/com/linkedin/venice/stats/StatsSupplierVeniceMetricsRepository.java create mode 100644 internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceMetricsConfig.java create mode 100644 internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceMetricsRepository.java create mode 100644 internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceOpenTelemetryConfigProperties.java create mode 100644 internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceOpenTelemetryMetricFormat.java create mode 100644 internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceOpenTelemetryMetricsRepository.java create mode 100644 internal/venice-client-common/src/test/java/com/linkedin/venice/stats/VeniceMetricsConfigTest.java create mode 100644 internal/venice-client-common/src/test/java/com/linkedin/venice/stats/VeniceMetricsRepositoryTest.java create mode 100644 internal/venice-client-common/src/test/java/com/linkedin/venice/stats/VeniceOpenTelemetryMetricsRepositoryTest.java create mode 100644 internal/venice-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceHttpResponseStatusCodeCategory.java create mode 100644 internal/venice-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceMetricsDimensions.java create mode 100644 internal/venice-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceRequestRetryAbortReason.java create mode 100644 internal/venice-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceRequestRetryType.java create mode 100644 internal/venice-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceRequestValidationOutcome.java create mode 100644 internal/venice-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceResponseStatusCategory.java create mode 100644 internal/venice-common/src/test/java/com/linkedin/venice/stats/dimensions/VeniceHttpResponseStatusCodeCategoryTest.java create mode 100644 internal/venice-common/src/test/java/com/linkedin/venice/stats/dimensions/VeniceMetricsDimensionsTest.java create mode 100644 internal/venice-common/src/test/java/com/linkedin/venice/stats/dimensions/VeniceRequestRetryAbortReasonTest.java create mode 100644 internal/venice-common/src/test/java/com/linkedin/venice/stats/dimensions/VeniceRequestRetryTypeTest.java create mode 100644 internal/venice-common/src/test/java/com/linkedin/venice/stats/dimensions/VeniceRequestValidationOutcomeTest.java create mode 100644 internal/venice-common/src/test/java/com/linkedin/venice/stats/dimensions/VeniceResponseStatusCategoryTest.java diff --git a/build.gradle b/build.gradle index f66f3dc852..b2082801c2 100644 --- a/build.gradle +++ b/build.gradle @@ -141,6 +141,11 @@ ext.libraries = [ zkclient: 'com.101tec:zkclient:0.7', // For Kafka AdminUtils zookeeper: 'org.apache.zookeeper:zookeeper:3.6.3', zstd: 'com.github.luben:zstd-jni:1.5.2-3', + opentelemetryApi: "io.opentelemetry:opentelemetry-api:1.43.0", + opentelemetrySdk: "io.opentelemetry:opentelemetry-sdk:1.43.0", + opentelemetryExporterLogging: "io.opentelemetry:opentelemetry-exporter-logging:1.43.0", + opentelemetryExporterOtlp: "io.opentelemetry:opentelemetry-exporter-otlp:1.43.0", + opentelemetryExporterCommon: "io.opentelemetry:opentelemetry-exporter-common:1.43.0" ] group = 'com.linkedin.venice' diff --git a/clients/da-vinci-client/src/main/java/com/linkedin/davinci/stats/AbstractVeniceAggVersionedStats.java b/clients/da-vinci-client/src/main/java/com/linkedin/davinci/stats/AbstractVeniceAggVersionedStats.java index 417da42f60..8bbd78c2bc 100644 --- a/clients/da-vinci-client/src/main/java/com/linkedin/davinci/stats/AbstractVeniceAggVersionedStats.java +++ b/clients/da-vinci-client/src/main/java/com/linkedin/davinci/stats/AbstractVeniceAggVersionedStats.java @@ -7,7 +7,7 @@ import com.linkedin.venice.meta.StoreDataChangedListener; import com.linkedin.venice.meta.Version; import com.linkedin.venice.meta.VersionStatus; -import com.linkedin.venice.stats.StatsSupplier; +import com.linkedin.venice.stats.StatsSupplierMetricsRepository; import com.linkedin.venice.utils.Utils; import com.linkedin.venice.utils.concurrent.VeniceConcurrentHashMap; import io.tehuti.metrics.MetricsRepository; @@ -25,7 +25,7 @@ public abstract class AbstractVeniceAggVersionedStats statsInitiator; - private final StatsSupplier reporterSupplier; + private final StatsSupplierMetricsRepository reporterSupplier; protected final ReadOnlyStoreRepository metadataRepository; private final MetricsRepository metricsRepository; @@ -37,7 +37,7 @@ public AbstractVeniceAggVersionedStats( MetricsRepository metricsRepository, ReadOnlyStoreRepository metadataRepository, Supplier statsInitiator, - StatsSupplier reporterSupplier, + StatsSupplierMetricsRepository reporterSupplier, boolean unregisterMetricForDeletedStoreEnabled) { this.metadataRepository = metadataRepository; this.metricsRepository = metricsRepository; diff --git a/clients/da-vinci-client/src/main/java/com/linkedin/davinci/stats/AggHostLevelIngestionStats.java b/clients/da-vinci-client/src/main/java/com/linkedin/davinci/stats/AggHostLevelIngestionStats.java index 463201f2b5..3ea802cb66 100644 --- a/clients/da-vinci-client/src/main/java/com/linkedin/davinci/stats/AggHostLevelIngestionStats.java +++ b/clients/da-vinci-client/src/main/java/com/linkedin/davinci/stats/AggHostLevelIngestionStats.java @@ -5,7 +5,7 @@ import com.linkedin.venice.exceptions.VeniceException; import com.linkedin.venice.meta.ReadOnlyStoreRepository; import com.linkedin.venice.stats.AbstractVeniceAggStoreStats; -import com.linkedin.venice.stats.StatsSupplier; +import com.linkedin.venice.stats.StatsSupplierMetricsRepository; import com.linkedin.venice.utils.Time; import io.tehuti.metrics.MetricsRepository; import java.util.Map; @@ -29,7 +29,7 @@ public AggHostLevelIngestionStats( unregisterMetricForDeletedStoreEnabled); } - static class HostLevelStoreIngestionStatsSupplier implements StatsSupplier { + static class HostLevelStoreIngestionStatsSupplier implements StatsSupplierMetricsRepository { private final VeniceServerConfig serverConfig; private final Map ingestionTaskMap; private final Time time; diff --git a/clients/da-vinci-client/src/main/java/com/linkedin/davinci/stats/AggKafkaConsumerServiceStats.java b/clients/da-vinci-client/src/main/java/com/linkedin/davinci/stats/AggKafkaConsumerServiceStats.java index a0756ba318..d4e6055154 100644 --- a/clients/da-vinci-client/src/main/java/com/linkedin/davinci/stats/AggKafkaConsumerServiceStats.java +++ b/clients/da-vinci-client/src/main/java/com/linkedin/davinci/stats/AggKafkaConsumerServiceStats.java @@ -3,7 +3,7 @@ import com.linkedin.venice.exceptions.VeniceException; import com.linkedin.venice.meta.ReadOnlyStoreRepository; import com.linkedin.venice.stats.AbstractVeniceAggStoreStats; -import com.linkedin.venice.stats.StatsSupplier; +import com.linkedin.venice.stats.StatsSupplierMetricsRepository; import com.linkedin.venice.utils.SystemTime; import io.tehuti.metrics.MetricsRepository; import java.util.function.LongSupplier; @@ -99,7 +99,7 @@ public void recordTotalLatestOffsetIsPresent() { totalStats.recordLatestOffsetIsPresent(); } - static class KafkaConsumerServiceStatsSupplier implements StatsSupplier { + static class KafkaConsumerServiceStatsSupplier implements StatsSupplierMetricsRepository { private final LongSupplier getMaxElapsedTimeSinceLastPollInConsumerPool; KafkaConsumerServiceStatsSupplier(LongSupplier getMaxElapsedTimeSinceLastPollInConsumerPool) { diff --git a/clients/da-vinci-client/src/main/java/com/linkedin/davinci/stats/VeniceVersionedStats.java b/clients/da-vinci-client/src/main/java/com/linkedin/davinci/stats/VeniceVersionedStats.java index ab2e88f53e..fcb1c24aa1 100644 --- a/clients/da-vinci-client/src/main/java/com/linkedin/davinci/stats/VeniceVersionedStats.java +++ b/clients/da-vinci-client/src/main/java/com/linkedin/davinci/stats/VeniceVersionedStats.java @@ -1,6 +1,6 @@ package com.linkedin.davinci.stats; -import com.linkedin.venice.stats.StatsSupplier; +import com.linkedin.venice.stats.StatsSupplierMetricsRepository; import io.tehuti.metrics.MetricsRepository; import it.unimi.dsi.fastutil.ints.Int2ObjectMap; import it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap; @@ -25,7 +25,7 @@ public VeniceVersionedStats( MetricsRepository metricsRepository, String storeName, Supplier statsInitiator, - StatsSupplier reporterSupplier) { + StatsSupplierMetricsRepository reporterSupplier) { this.storeName = storeName; this.versionedStats = new Int2ObjectOpenHashMap<>(); this.reporters = new VeniceVersionedStatsReporter<>(metricsRepository, storeName, reporterSupplier); diff --git a/clients/da-vinci-client/src/main/java/com/linkedin/davinci/stats/VeniceVersionedStatsReporter.java b/clients/da-vinci-client/src/main/java/com/linkedin/davinci/stats/VeniceVersionedStatsReporter.java index dd112be658..4753733881 100644 --- a/clients/da-vinci-client/src/main/java/com/linkedin/davinci/stats/VeniceVersionedStatsReporter.java +++ b/clients/da-vinci-client/src/main/java/com/linkedin/davinci/stats/VeniceVersionedStatsReporter.java @@ -4,7 +4,7 @@ import com.linkedin.venice.common.VeniceSystemStoreUtils; import com.linkedin.venice.stats.AbstractVeniceStats; -import com.linkedin.venice.stats.StatsSupplier; +import com.linkedin.venice.stats.StatsSupplierMetricsRepository; import io.tehuti.metrics.MetricsRepository; import io.tehuti.metrics.stats.AsyncGauge; @@ -22,7 +22,7 @@ public class VeniceVersionedStatsReporter statsSupplier) { + StatsSupplierMetricsRepository statsSupplier) { super(metricsRepository, storeName); this.isSystemStore = VeniceSystemStoreUtils.isSystemStore(storeName); diff --git a/clients/da-vinci-client/src/main/java/com/linkedin/davinci/stats/ingestion/heartbeat/HeartbeatVersionedStats.java b/clients/da-vinci-client/src/main/java/com/linkedin/davinci/stats/ingestion/heartbeat/HeartbeatVersionedStats.java index 7f3220c000..a02bcd883f 100644 --- a/clients/da-vinci-client/src/main/java/com/linkedin/davinci/stats/ingestion/heartbeat/HeartbeatVersionedStats.java +++ b/clients/da-vinci-client/src/main/java/com/linkedin/davinci/stats/ingestion/heartbeat/HeartbeatVersionedStats.java @@ -3,7 +3,7 @@ import com.linkedin.davinci.stats.AbstractVeniceAggVersionedStats; import com.linkedin.venice.meta.ReadOnlyStoreRepository; import com.linkedin.venice.meta.Store; -import com.linkedin.venice.stats.StatsSupplier; +import com.linkedin.venice.stats.StatsSupplierMetricsRepository; import io.tehuti.metrics.MetricsRepository; import java.util.Map; import java.util.function.Supplier; @@ -17,7 +17,7 @@ public HeartbeatVersionedStats( MetricsRepository metricsRepository, ReadOnlyStoreRepository metadataRepository, Supplier statsInitiator, - StatsSupplier reporterSupplier, + StatsSupplierMetricsRepository reporterSupplier, Map>>> leaderMonitors, Map>>> followerMonitors) { super(metricsRepository, metadataRepository, statsInitiator, reporterSupplier, true); diff --git a/internal/venice-client-common/build.gradle b/internal/venice-client-common/build.gradle index 3ac9a15dc7..9d62bb9260 100644 --- a/internal/venice-client-common/build.gradle +++ b/internal/venice-client-common/build.gradle @@ -39,6 +39,11 @@ dependencies { implementation libraries.log4j2api implementation libraries.zstd implementation libraries.conscrypt + implementation libraries.opentelemetryApi + implementation libraries.opentelemetrySdk + implementation libraries.opentelemetryExporterLogging + implementation libraries.opentelemetryExporterOtlp + implementation libraries.opentelemetryExporterCommon testImplementation project(':internal:venice-test-common') testImplementation project(':clients:venice-thin-client') diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/read/RequestType.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/read/RequestType.java index b426ee1aab..b734b017ab 100644 --- a/internal/venice-client-common/src/main/java/com/linkedin/venice/read/RequestType.java +++ b/internal/venice-client-common/src/main/java/com/linkedin/venice/read/RequestType.java @@ -1,16 +1,23 @@ package com.linkedin.venice.read; public enum RequestType { - SINGLE_GET(""), MULTI_GET("multiget_"), MULTI_GET_STREAMING("multiget_streaming_"), COMPUTE("compute_"), - COMPUTE_STREAMING("compute_streaming_"); + SINGLE_GET("", "single_get"), MULTI_GET("multiget_", "multi_get"), + MULTI_GET_STREAMING("multiget_streaming_", "multi_get_streaming"), COMPUTE("compute_", "compute"), + COMPUTE_STREAMING("compute_streaming_", "compute_streaming"); private String metricPrefix; + private String requestTypeName; - RequestType(String metricPrefix) { + RequestType(String metricPrefix, String requestTypeName) { this.metricPrefix = metricPrefix; + this.requestTypeName = requestTypeName; } public String getMetricPrefix() { return this.metricPrefix; } + + public String getRequestTypeName() { + return this.requestTypeName; + } } diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/AbstractVeniceAggStats.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/AbstractVeniceAggStats.java index 03a87a4bfe..9255f18731 100644 --- a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/AbstractVeniceAggStats.java +++ b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/AbstractVeniceAggStats.java @@ -10,40 +10,81 @@ public abstract class AbstractVeniceAggStats { protected T totalStats; protected final Map storeStats = new VeniceConcurrentHashMap<>(); - private StatsSupplier statsFactory; + private StatsSupplierMetricsRepository statsFactoryMetricsRepository; + private StatsSupplierVeniceMetricsRepository statsFactoryVeniceMetricsRepository; + private final MetricsRepository metricsRepository; + private String clusterName = null; - private AbstractVeniceAggStats(MetricsRepository metricsRepository, StatsSupplier statsSupplier, T totalStats) { + private AbstractVeniceAggStats( + MetricsRepository metricsRepository, + StatsSupplierMetricsRepository statsSupplier, + T totalStats) { + this.metricsRepository = metricsRepository; + this.statsFactoryMetricsRepository = statsSupplier; + this.totalStats = totalStats; + } + + private AbstractVeniceAggStats( + VeniceMetricsRepository metricsRepository, + StatsSupplierVeniceMetricsRepository statsSupplier, + String clusterName, + T totalStats) { this.metricsRepository = metricsRepository; - this.statsFactory = statsSupplier; + this.statsFactoryVeniceMetricsRepository = statsSupplier; + this.clusterName = clusterName; this.totalStats = totalStats; } - public AbstractVeniceAggStats(MetricsRepository metricsRepository, StatsSupplier statsSupplier) { + public AbstractVeniceAggStats(MetricsRepository metricsRepository, StatsSupplierMetricsRepository statsSupplier) { this(metricsRepository, statsSupplier, statsSupplier.get(metricsRepository, STORE_NAME_FOR_TOTAL_STAT, null)); } - public AbstractVeniceAggStats(MetricsRepository metricsRepository) { + public AbstractVeniceAggStats( + StatsSupplierVeniceMetricsRepository statsSupplier, + VeniceMetricsRepository metricsRepository, + String clusterName) { + this( + metricsRepository, + statsSupplier, + clusterName, + statsSupplier.get(metricsRepository, STORE_NAME_FOR_TOTAL_STAT, clusterName, null)); + } + + public AbstractVeniceAggStats(MetricsRepository metricsRepository, String clusterName) { this.metricsRepository = metricsRepository; + this.clusterName = clusterName; } - public void setStatsSupplier(StatsSupplier statsSupplier) { - this.statsFactory = statsSupplier; - this.totalStats = statsSupplier.get(metricsRepository, STORE_NAME_FOR_TOTAL_STAT, null); + public void setStatsSupplier(StatsSupplierVeniceMetricsRepository statsSupplier) { + this.statsFactoryVeniceMetricsRepository = statsSupplier; + if (metricsRepository instanceof VeniceMetricsRepository) { + this.totalStats = + statsSupplier.get((VeniceMetricsRepository) metricsRepository, STORE_NAME_FOR_TOTAL_STAT, clusterName, null); + } } public AbstractVeniceAggStats( String clusterName, MetricsRepository metricsRepository, - StatsSupplier statsSupplier) { + StatsSupplierMetricsRepository statsSupplier) { this( metricsRepository, statsSupplier, statsSupplier.get(metricsRepository, STORE_NAME_FOR_TOTAL_STAT + "." + clusterName, null)); + this.clusterName = clusterName; } public T getStoreStats(String storeName) { - return storeStats.computeIfAbsent(storeName, k -> statsFactory.get(metricsRepository, storeName, totalStats)); + if (metricsRepository instanceof VeniceMetricsRepository) { + return storeStats.computeIfAbsent( + storeName, + k -> statsFactoryVeniceMetricsRepository + .get((VeniceMetricsRepository) metricsRepository, storeName, clusterName, totalStats)); + } else { + return storeStats + .computeIfAbsent(storeName, k -> statsFactoryMetricsRepository.get(metricsRepository, storeName, totalStats)); + } } public T getNullableStoreStats(String storeName) { diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/StatsSupplier.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/StatsSupplierMetricsRepository.java similarity index 89% rename from internal/venice-client-common/src/main/java/com/linkedin/venice/stats/StatsSupplier.java rename to internal/venice-client-common/src/main/java/com/linkedin/venice/stats/StatsSupplierMetricsRepository.java index 9967d93c3f..208fc4470e 100644 --- a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/StatsSupplier.java +++ b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/StatsSupplierMetricsRepository.java @@ -3,7 +3,7 @@ import io.tehuti.metrics.MetricsRepository; -public interface StatsSupplier { +public interface StatsSupplierMetricsRepository { /** * Legacy function, for implementations that do not use total stats in their constructor. * diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/StatsSupplierVeniceMetricsRepository.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/StatsSupplierVeniceMetricsRepository.java new file mode 100644 index 0000000000..c604515b89 --- /dev/null +++ b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/StatsSupplierVeniceMetricsRepository.java @@ -0,0 +1,19 @@ +package com.linkedin.venice.stats; + +/** copy of {@link StatsSupplierMetricsRepository} for {@link VeniceMetricsRepository} */ +public interface StatsSupplierVeniceMetricsRepository { + /** + * Legacy function, for implementations that do not use total stats in their constructor. + * + * @see #get(VeniceMetricsRepository, String, String, AbstractVeniceStats) which is the only caller. + */ + T get(VeniceMetricsRepository metricsRepository, String storeName, String clusterName); + + /** + * This is the function that gets called by {@link AbstractVeniceAggStats}, and concrete classes can + * optionally implement it in order to be provided with the total stats instance. + */ + default T get(VeniceMetricsRepository metricsRepository, String storeName, String clusterName, T totalStats) { + return get(metricsRepository, storeName, clusterName); + } +} diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/TehutiUtils.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/TehutiUtils.java index 7a5cf212cd..7ef5a7bdf5 100644 --- a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/TehutiUtils.java +++ b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/TehutiUtils.java @@ -9,6 +9,7 @@ import io.tehuti.metrics.stats.Percentiles; import io.tehuti.metrics.stats.Rate; import java.util.Arrays; +import java.util.Map; /** @@ -129,6 +130,19 @@ public static MetricsRepository getMetricsRepository(String serviceName) { return metricsRepository; } + public static VeniceMetricsRepository getVeniceMetricsRepository( + String serviceName, + String metricPrefix, + Map configs) { + VeniceMetricsRepository metricsRepository = new VeniceMetricsRepository( + new VeniceMetricsConfig.VeniceMetricsConfigBuilder().setServiceName(serviceName) + .setMetricPrefix(metricPrefix) + .extractAndSetOtelConfigs(configs) + .build()); + metricsRepository.addReporter(new JmxReporter(serviceName)); + return metricsRepository; + } + /** * A valid metric name needs to pass the test in {@link javax.management.ObjectName}. This helper function will * try to fix all invalid character mentioned in the above function to avoid MalformedObjectNameException; besides, diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceMetricsConfig.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceMetricsConfig.java new file mode 100644 index 0000000000..586143f276 --- /dev/null +++ b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceMetricsConfig.java @@ -0,0 +1,235 @@ +package com.linkedin.venice.stats; + +import com.linkedin.venice.exceptions.VeniceException; +import io.opentelemetry.sdk.metrics.export.MetricExporter; +import io.tehuti.metrics.MetricConfig; +import java.util.HashMap; +import java.util.Locale; +import java.util.Map; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + + +public class VeniceMetricsConfig { + private static final Logger LOGGER = LogManager.getLogger(VeniceMetricsConfig.class); + private final String serviceName; + private final String metricPrefix; + /** config to control whether to emit OpenTelemetry or tehuti metrics or both + * emitTehutiMetrics is not used for now */ + private final boolean emitOpenTelemetryMetrics; + private final boolean emitTehutiMetrics; + + /** extra configs for OpenTelemetry. Supports 2 exporter currently
+ * 1. {@link MetricExporter} for exporting to Http/Grpc endpoint. More details are supported via configs, + * check {@link VeniceMetricsConfigBuilder#extractAndSetOtelConfigs} and {@link VeniceOpenTelemetryMetricsRepository#getOtlpHttpMetricExporter}
+ * 2. {@link VeniceOpenTelemetryMetricsRepository.LogBasedMetricExporter} for debug purposes + */ + private final Map otelConfigs; + private final boolean emitToHttpGrpcEndpoint; + private final boolean emitToLog; // for debug purposes + private final VeniceOpenTelemetryMetricFormat metricFormat; + private final boolean useExponentialHistogram; + private final int exponentialHistogramMaxScale; + private final int exponentialHistogramMaxBuckets; + + /** reusing tehuti's MetricConfig */ + private final MetricConfig tehutiMetricConfig; + + private VeniceMetricsConfig(VeniceMetricsConfigBuilder veniceMetricsConfigBuilder) { + this.serviceName = veniceMetricsConfigBuilder.serviceName; + this.metricPrefix = veniceMetricsConfigBuilder.metricPrefix; + this.emitOpenTelemetryMetrics = veniceMetricsConfigBuilder.emitOpenTelemetryMetrics; + this.emitTehutiMetrics = veniceMetricsConfigBuilder.emitTehutiMetrics; + this.emitToHttpGrpcEndpoint = veniceMetricsConfigBuilder.emitToHttpGrpcEndpoint; + this.emitToLog = veniceMetricsConfigBuilder.emitToLog; + this.metricFormat = veniceMetricsConfigBuilder.metricFormat; + this.useExponentialHistogram = veniceMetricsConfigBuilder.useExponentialHistogram; + this.exponentialHistogramMaxScale = veniceMetricsConfigBuilder.exponentialHistogramMaxScale; + this.exponentialHistogramMaxBuckets = veniceMetricsConfigBuilder.exponentialHistogramMaxBuckets; + this.otelConfigs = veniceMetricsConfigBuilder.otelConfigs; + this.tehutiMetricConfig = veniceMetricsConfigBuilder.tehutiMetricConfig; + } + + public static class VeniceMetricsConfigBuilder { + private String serviceName = "noop_service"; + private String metricPrefix = null; + private boolean emitOpenTelemetryMetrics = false; + private boolean emitTehutiMetrics = true; + private boolean emitToHttpGrpcEndpoint = false; + private boolean emitToLog = false; + private VeniceOpenTelemetryMetricFormat metricFormat = VeniceOpenTelemetryMetricFormat.SNAKE_CASE; + private boolean useExponentialHistogram = true; + private int exponentialHistogramMaxScale = 3; + private int exponentialHistogramMaxBuckets = 250; + private Map otelConfigs = new HashMap<>(); + private MetricConfig tehutiMetricConfig = null; + + public VeniceMetricsConfigBuilder setServiceName(String serviceName) { + this.serviceName = serviceName; + return this; + } + + public VeniceMetricsConfigBuilder setMetricPrefix(String metricPrefix) { + this.metricPrefix = metricPrefix; + return this; + } + + public VeniceMetricsConfigBuilder setEmitOpenTelemetryMetrics(boolean emitOpenTelemetryMetrics) { + this.emitOpenTelemetryMetrics = emitOpenTelemetryMetrics; + return this; + } + + public VeniceMetricsConfigBuilder setEmitTehutiMetrics(boolean emitTehutiMetrics) { + this.emitTehutiMetrics = emitTehutiMetrics; + return this; + } + + public VeniceMetricsConfigBuilder setEmitToHttpGrpcEndpoint(boolean emitToHttpGrpcEndpoint) { + this.emitToHttpGrpcEndpoint = emitToHttpGrpcEndpoint; + return this; + } + + public VeniceMetricsConfigBuilder setEmitToLog(boolean emitToLog) { + this.emitToLog = emitToLog; + return this; + } + + public VeniceMetricsConfigBuilder setMetricFormat(VeniceOpenTelemetryMetricFormat metricFormat) { + this.metricFormat = metricFormat; + return this; + } + + public VeniceMetricsConfigBuilder extractAndSetOtelConfigs(Map configs) { + // copy only OpenTelemetry related configs + for (Map.Entry entry: configs.entrySet()) { + if (entry.getKey().startsWith("otel.")) { + otelConfigs.put(entry.getKey(), entry.getValue()); + } + } + LOGGER.info("OpenTelemetry configs: {}", otelConfigs); + return this; + } + + public VeniceMetricsConfigBuilder setTehutiMetricConfig(MetricConfig tehutiMetricConfig) { + this.tehutiMetricConfig = tehutiMetricConfig; + return this; + } + + /** get the last part of the service name + * For instance: if service name is "venice-router", return "router" + */ + public static String getMetricsPrefix(String input) { + String[] parts = input.split("[\\-\\._]"); + String lastPart = parts[parts.length - 1]; + return lastPart; + } + + // Validate required fields before building + private void checkAndSetDefaults() { + if (tehutiMetricConfig == null) { + setTehutiMetricConfig(new MetricConfig()); + } + if (metricPrefix == null) { + setMetricPrefix(getMetricsPrefix(serviceName)); + } + if (otelConfigs.containsKey("otel.venice.enabled")) { + String status = otelConfigs.get("otel.venice.enabled"); + if (status != null) { + setEmitOpenTelemetryMetrics(status.toLowerCase(Locale.ROOT).equals("true")); + } + } + // check otelConfigs and set defaults + if (emitOpenTelemetryMetrics) { + if (otelConfigs.containsKey("otel.venice.export.to.log")) { + String emitStatus = otelConfigs.get("otel.venice.export.to.log"); + if (emitStatus != null) { + setEmitToLog(emitStatus.toLowerCase(Locale.ROOT).equals("true")); + } + } + if (otelConfigs.containsKey("otel.venice.export.to.http.grpc.endpoint")) { + String emitStatus = otelConfigs.get("otel.venice.export.to.http.grpc.endpoint"); + if (emitStatus != null) { + setEmitToHttpGrpcEndpoint(emitStatus.toLowerCase(Locale.ROOT).equals("true")); + } + } + if (otelConfigs.containsKey("otel.venice.metrics.format")) { + String format = otelConfigs.get("otel.venice.metrics.format"); + if (format != null) { + try { + setMetricFormat(VeniceOpenTelemetryMetricFormat.valueOf(format.toUpperCase(Locale.ROOT))); + } catch (IllegalArgumentException e) { + LOGGER.warn("Invalid metric format: {}, setting to default: {}", format, metricFormat); + } + } + } + if (emitToHttpGrpcEndpoint) { + if (!otelConfigs.containsKey("otel.exporter.otlp.metrics.protocol") + || !otelConfigs.containsKey("otel.exporter.otlp.metrics.endpoint")) { + throw new VeniceException( + "otel settings missing for otel.exporter.otlp.metrics.protocol and otel.exporter.otlp.metrics.endpoint"); + } + } + } + } + + public VeniceMetricsConfig build() { + checkAndSetDefaults(); + return new VeniceMetricsConfig(this); + } + } + + // all getters + public String getServiceName() { + return this.serviceName; + } + + public String getMetricPrefix() { + return this.metricPrefix; + } + + public boolean isEmitOpenTelemetryMetrics() { + return emitOpenTelemetryMetrics; + } + + public boolean isEmitToHttpGrpcEndpoint() { + return emitToHttpGrpcEndpoint; + } + + public boolean isEmitToLog() { + return emitToLog; + } + + public VeniceOpenTelemetryMetricFormat getMetricFormat() { + return metricFormat; + } + + public boolean isUseExponentialHistogram() { + return useExponentialHistogram; + } + + public int getExponentialHistogramMaxScale() { + return exponentialHistogramMaxScale; + } + + public int getExponentialHistogramMaxBuckets() { + return exponentialHistogramMaxBuckets; + } + + public Map getOtelConfigs() { + return otelConfigs; + } + + public MetricConfig getTehutiMetricConfig() { + return tehutiMetricConfig; + } + + @Override + public String toString() { + return "VeniceMetricsConfig{" + "serviceName='" + serviceName + '\'' + ", metricPrefix='" + metricPrefix + '\'' + + ", emitOpenTelemetryMetrics=" + emitOpenTelemetryMetrics + ", emitTehutiMetrics=" + emitTehutiMetrics + + ", otelConfigs=" + otelConfigs + ", emitToHttpGrpcEndpoint=" + emitToHttpGrpcEndpoint + ", emitToLog=" + + emitToLog + ", metricFormat=" + metricFormat + ", useExponentialHistogram=" + useExponentialHistogram + + ", exponentialHistogramMaxScale=" + exponentialHistogramMaxScale + ", exponentialHistogramMaxBuckets=" + + exponentialHistogramMaxBuckets + ", tehutiMetricConfig=" + tehutiMetricConfig + '}'; + } +} diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceMetricsRepository.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceMetricsRepository.java new file mode 100644 index 0000000000..18110e69be --- /dev/null +++ b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceMetricsRepository.java @@ -0,0 +1,192 @@ +package com.linkedin.venice.stats; + +import io.tehuti.Metric; +import io.tehuti.metrics.Measurable; +import io.tehuti.metrics.MetricConfig; +import io.tehuti.metrics.MetricsReporter; +import io.tehuti.metrics.MetricsRepository; +import io.tehuti.metrics.Sensor; +import io.tehuti.metrics.stats.AsyncGauge; +import java.io.Closeable; +import java.util.Map; +import java.util.Objects; + + +/** extends MetricsRepository to keep the changes to a minimum. Next step would be to create a MetricsRepository inside rather than extending it */ +public class VeniceMetricsRepository extends MetricsRepository implements Closeable { + private MetricsRepository delegate = null; + private VeniceMetricsConfig veniceMetricsConfig; + VeniceOpenTelemetryMetricsRepository openTelemetryMetricsRepository; + + public VeniceMetricsRepository() { + super(); + this.veniceMetricsConfig = new VeniceMetricsConfig.VeniceMetricsConfigBuilder().build(); + this.openTelemetryMetricsRepository = new VeniceOpenTelemetryMetricsRepository(veniceMetricsConfig); + } + + public VeniceMetricsRepository(VeniceMetricsConfig veniceMetricsConfig) { + super(veniceMetricsConfig.getTehutiMetricConfig()); + this.veniceMetricsConfig = veniceMetricsConfig; + this.openTelemetryMetricsRepository = new VeniceOpenTelemetryMetricsRepository(veniceMetricsConfig); + } + + public VeniceMetricsRepository( + VeniceMetricsConfig veniceMetricsConfig, + VeniceOpenTelemetryMetricsRepository openTelemetryMetricsRepository) { + super(veniceMetricsConfig.getTehutiMetricConfig()); + this.veniceMetricsConfig = veniceMetricsConfig; + this.openTelemetryMetricsRepository = openTelemetryMetricsRepository; + } + + /** if MetricsRepository is passed in, then use it as the delegate, can be removed after the migration */ + public VeniceMetricsRepository(MetricsRepository metricsRepository, VeniceMetricsConfig veniceMetricsConfig) { + this.delegate = Objects.requireNonNull(metricsRepository); + this.veniceMetricsConfig = veniceMetricsConfig; + this.openTelemetryMetricsRepository = new VeniceOpenTelemetryMetricsRepository(veniceMetricsConfig); + } + + public VeniceMetricsRepository( + MetricsRepository metricsRepository, + VeniceMetricsConfig veniceMetricsConfig, + VeniceOpenTelemetryMetricsRepository openTelemetryMetricsRepository) { + this.delegate = Objects.requireNonNull(metricsRepository); + this.veniceMetricsConfig = veniceMetricsConfig; + this.openTelemetryMetricsRepository = openTelemetryMetricsRepository; + } + + public VeniceOpenTelemetryMetricsRepository getOpenTelemetryMetricsRepository() { + return this.openTelemetryMetricsRepository; + } + + public VeniceMetricsConfig getVeniceMetricsConfig() { + return veniceMetricsConfig; + } + + @Override + public void close() { + if (delegate != null) { + delegate.close(); + } else { + super.close(); + } + openTelemetryMetricsRepository.close(); + } + + // all other overrides from MetricsRepository to use delegate + @Override + public Sensor getSensor(String name) { + if (delegate != null) { + return delegate.getSensor(name); + } else { + return super.getSensor(name); + } + } + + @Override + public Sensor sensor(String name) { + if (delegate != null) { + return delegate.sensor(name); + } else { + return super.sensor(name); + } + } + + @Override + public Sensor sensor(String name, Sensor... parents) { + if (delegate != null) { + return delegate.sensor(name, parents); + } else { + return super.sensor(name, parents); + } + } + + @Override + public synchronized Sensor sensor(String name, MetricConfig config, Sensor... parents) { + if (delegate != null) { + return delegate.sensor(name, config, parents); + } else { + return super.sensor(name, config, parents); + } + } + + @Override + public synchronized void removeSensor(String name) { + if (delegate != null) { + delegate.removeSensor(name); + } else { + super.removeSensor(name); + } + } + + @Override + public Metric addMetric(String name, Measurable measurable) { + if (delegate != null) { + return delegate.addMetric(name, measurable); + } else { + return super.addMetric(name, measurable); + } + } + + @Override + public Metric addMetric(String name, String description, Measurable measurable) { + if (delegate != null) { + return delegate.addMetric(name, description, measurable); + } else { + return super.addMetric(name, description, measurable); + } + } + + @Override + public Metric addMetric(String name, MetricConfig config, Measurable measurable) { + if (delegate != null) { + return delegate.addMetric(name, config, measurable); + } else { + return super.addMetric(name, config, measurable); + } + } + + @Override + public synchronized Metric addMetric(String name, String description, MetricConfig config, Measurable measurable) { + if (delegate != null) { + return delegate.addMetric(name, description, config, measurable); + } else { + return super.addMetric(name, description, config, measurable); + } + } + + @Override + public synchronized void addReporter(MetricsReporter reporter) { + if (delegate != null) { + delegate.addReporter(reporter); + } else { + super.addReporter(reporter); + } + } + + @Override + public Map metrics() { + if (delegate != null) { + return delegate.metrics(); + } else { + return super.metrics(); + } + } + + @Override + public Metric getMetric(String name) { + if (delegate != null) { + return delegate.getMetric(name); + } else { + return super.getMetric(name); + } + } + + @Override + public AsyncGauge.AsyncGaugeExecutor getAsyncGaugeExecutor() { + if (delegate != null) { + return delegate.getAsyncGaugeExecutor(); + } else { + return super.getAsyncGaugeExecutor(); + } + } +} diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceOpenTelemetryConfigProperties.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceOpenTelemetryConfigProperties.java new file mode 100644 index 0000000000..bc3e06c19d --- /dev/null +++ b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceOpenTelemetryConfigProperties.java @@ -0,0 +1,284 @@ +package com.linkedin.venice.stats; + +import static java.util.stream.Collectors.groupingBy; +import static java.util.stream.Collectors.joining; + +import io.opentelemetry.api.internal.ConfigUtil; +import io.opentelemetry.api.internal.StringUtils; +import io.opentelemetry.sdk.autoconfigure.spi.ConfigProperties; +import io.opentelemetry.sdk.autoconfigure.spi.ConfigurationException; +import java.time.Duration; +import java.util.AbstractMap; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.TimeUnit; +import java.util.function.Function; +import java.util.stream.Collectors; +import javax.annotation.Nullable; + + +/** + * copy of {@link io.opentelemetry.sdk.autoconfigure.spi.internal.DefaultConfigProperties} with + * modification to {@link #getList} to support comma separated values for 1 key. + * + * In original class: comma separated values will be considered as different key values + * In new class: comma separated values will be considered as 1 key value + * + * Copied the entire class DefaultConfigProperties rather than extending it because it is final class. + */ +public class VeniceOpenTelemetryConfigProperties implements ConfigProperties { + /** + * Properties are normalized to The properties for both of these will be normalized to be all lower + * case, dashses are replaces with periods, and environment variable underscores are replaces with + * periods. + * + *

This class is internal and is hence not for public use. Its APIs are unstable and can change + * at any time. + */ + + private final Map config; + + /** + * Creates a {@link VeniceOpenTelemetryConfigProperties} by merging system properties, environment variables, + * and the {@code defaultProperties}. + * + *

Environment variables take priority over {@code defaultProperties}. System properties take + * priority over environment variables. + */ + public static VeniceOpenTelemetryConfigProperties create(Map defaultProperties) { + return new VeniceOpenTelemetryConfigProperties(System.getProperties(), System.getenv(), defaultProperties); + } + + /** + * Create a {@link VeniceOpenTelemetryConfigProperties} from the {@code properties}, ignoring system + * properties and environment variables. + */ + public static VeniceOpenTelemetryConfigProperties createFromMap(Map properties) { + return new VeniceOpenTelemetryConfigProperties(properties, Collections.emptyMap(), Collections.emptyMap()); + } + + private VeniceOpenTelemetryConfigProperties( + Map systemProperties, + Map environmentVariables, + Map defaultProperties) { + Map config = new HashMap<>(); + defaultProperties.forEach((name, value) -> config.put(ConfigUtil.normalizePropertyKey(name), value)); + environmentVariables.forEach((name, value) -> config.put(ConfigUtil.normalizeEnvironmentVariableKey(name), value)); + systemProperties + .forEach((key, value) -> config.put(ConfigUtil.normalizePropertyKey(key.toString()), value.toString())); + + this.config = config; + } + + private VeniceOpenTelemetryConfigProperties( + VeniceOpenTelemetryConfigProperties previousProperties, + Map overrides) { + // previousProperties are already normalized, they can be copied as they are + Map config = new HashMap<>(previousProperties.config); + overrides.forEach((name, value) -> config.put(ConfigUtil.normalizePropertyKey(name), value)); + + this.config = config; + } + + @Override + @Nullable + public String getString(String name) { + return config.get(ConfigUtil.normalizePropertyKey(name)); + } + + @Override + @Nullable + public Boolean getBoolean(String name) { + String value = config.get(ConfigUtil.normalizePropertyKey(name)); + if (value == null || value.isEmpty()) { + return null; + } + return Boolean.parseBoolean(value); + } + + @Override + @Nullable + @SuppressWarnings("UnusedException") + public Integer getInt(String name) { + String value = config.get(ConfigUtil.normalizePropertyKey(name)); + if (value == null || value.isEmpty()) { + return null; + } + try { + return Integer.parseInt(value); + } catch (NumberFormatException ex) { + throw newInvalidPropertyException(name, value, "integer"); + } + } + + @Override + @Nullable + @SuppressWarnings("UnusedException") + public Long getLong(String name) { + String value = config.get(ConfigUtil.normalizePropertyKey(name)); + if (value == null || value.isEmpty()) { + return null; + } + try { + return Long.parseLong(value); + } catch (NumberFormatException ex) { + throw newInvalidPropertyException(name, value, "long"); + } + } + + @Override + @Nullable + @SuppressWarnings("UnusedException") + public Double getDouble(String name) { + String value = config.get(ConfigUtil.normalizePropertyKey(name)); + if (value == null || value.isEmpty()) { + return null; + } + try { + return Double.parseDouble(value); + } catch (NumberFormatException ex) { + throw newInvalidPropertyException(name, value, "double"); + } + } + + @Override + @Nullable + @SuppressWarnings("UnusedException") + public Duration getDuration(String name) { + String value = config.get(ConfigUtil.normalizePropertyKey(name)); + if (value == null || value.isEmpty()) { + return null; + } + String unitString = getUnitString(value); + // TODO: Environment variables have unknown encoding. `trim()` may cut codepoints oddly + // but likely we'll fail for malformed unit string either way. + String numberString = value.substring(0, value.length() - unitString.length()); + try { + long rawNumber = Long.parseLong(numberString.trim()); + TimeUnit unit = getDurationUnit(unitString.trim()); + return Duration.ofNanos(TimeUnit.NANOSECONDS.convert(rawNumber, unit)); + } catch (NumberFormatException ex) { + throw new ConfigurationException( + "Invalid duration property " + name + "=" + value + ". Expected number, found: " + numberString, + ex); + } catch (ConfigurationException ex) { + throw new ConfigurationException("Invalid duration property " + name + "=" + value + ". " + ex.getMessage()); + } + } + + @Override + public List getList(String name) { + String value = config.get(ConfigUtil.normalizePropertyKey(name)); + if (value == null) { + return Collections.emptyList(); + } + // TODO this is a bit of a hack to support comma separated value for 1 key to be passed in header + // return filterBlanksAndNulls(value.split(",")); + return filterBlanksAndNulls(new String[] { value }); + } + + /** + * Returns {@link ConfigProperties#getList(String)} as a {@link Set} after validating there are no + * duplicate entries. + * + * @throws ConfigurationException if {@code name} contains duplicate entries + */ + public static Set getSet(ConfigProperties config, String name) { + List list = config.getList(ConfigUtil.normalizePropertyKey(name)); + Set set = new HashSet<>(list); + if (set.size() != list.size()) { + String duplicates = list.stream() + .collect(groupingBy(Function.identity(), Collectors.counting())) + .entrySet() + .stream() + .filter(entry -> entry.getValue() > 1) + .map(Map.Entry::getKey) + .collect(joining(",", "[", "]")); + throw new ConfigurationException(name + " contains duplicates: " + duplicates); + } + return set; + } + + @Override + public Map getMap(String name) { + return getList(ConfigUtil.normalizePropertyKey(name)).stream().map(entry -> { + String[] split = entry.split("=", 2); + if (split.length != 2 || StringUtils.isNullOrEmpty(split[0])) { + throw new ConfigurationException("Invalid map property: " + name + "=" + config.get(name)); + } + return filterBlanksAndNulls(split); + }) + // Filter entries with an empty value, i.e. "foo=" + .filter(splitKeyValuePairs -> splitKeyValuePairs.size() == 2) + .map( + splitKeyValuePairs -> new AbstractMap.SimpleImmutableEntry<>( + splitKeyValuePairs.get(0), + splitKeyValuePairs.get(1))) + // If duplicate keys, prioritize later ones similar to duplicate system properties on a + // Java command line. + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue, (first, next) -> next, LinkedHashMap::new)); + } + + /** + * Return a new {@link VeniceOpenTelemetryConfigProperties} by overriding the {@code previousProperties} with + * the {@code overrides}. + */ + public VeniceOpenTelemetryConfigProperties withOverrides(Map overrides) { + return new VeniceOpenTelemetryConfigProperties(this, overrides); + } + + private static ConfigurationException newInvalidPropertyException(String name, String value, String type) { + throw new ConfigurationException("Invalid value for property " + name + "=" + value + ". Must be a " + type + "."); + } + + private static List filterBlanksAndNulls(String[] values) { + return Arrays.stream(values).map(String::trim).filter(s -> !s.isEmpty()).collect(Collectors.toList()); + } + + /** Returns the TimeUnit associated with a unit string. Defaults to milliseconds. */ + private static TimeUnit getDurationUnit(String unitString) { + switch (unitString) { + case "us": + return TimeUnit.MICROSECONDS; + case "ns": + return TimeUnit.NANOSECONDS; + case "": // Fallthrough expected + case "ms": + return TimeUnit.MILLISECONDS; + case "s": + return TimeUnit.SECONDS; + case "m": + return TimeUnit.MINUTES; + case "h": + return TimeUnit.HOURS; + case "d": + return TimeUnit.DAYS; + default: + throw new ConfigurationException("Invalid duration string, found: " + unitString); + } + } + + /** + * Fragments the 'units' portion of a config value from the 'value' portion. + * + *

E.g. "1ms" would return the string "ms". + */ + private static String getUnitString(String rawValue) { + int lastDigitIndex = rawValue.length() - 1; + while (lastDigitIndex >= 0) { + char c = rawValue.charAt(lastDigitIndex); + if (Character.isDigit(c)) { + break; + } + lastDigitIndex -= 1; + } + // Pull everything after the last digit. + return rawValue.substring(lastDigitIndex + 1); + } +} diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceOpenTelemetryMetricFormat.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceOpenTelemetryMetricFormat.java new file mode 100644 index 0000000000..27bb283eba --- /dev/null +++ b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceOpenTelemetryMetricFormat.java @@ -0,0 +1,34 @@ +package com.linkedin.venice.stats; + +import com.linkedin.venice.utils.VeniceEnumValue; + + +public enum VeniceOpenTelemetryMetricFormat implements VeniceEnumValue { + /** + * Default format if not configured, names are defined as per this. + * should use snake case as per https://opentelemetry.io/docs/specs/semconv/general/attribute-naming/ + * For example: http.response.status_code + */ + SNAKE_CASE(0), + /** + * Alternate format for attribute names. If configured, defined names in snake_case will be + * transformed to either one of below formats. + * + * camel case: For example, http.response.statusCode + * pascal case: For example, Http.Response.StatusCode + */ + CAMEL_CASE(1), PASCAL_CASE(2); + + private final int value; + + VeniceOpenTelemetryMetricFormat(int value) { + this.value = value; + } + + public static final int SIZE = values().length; + + @Override + public int getValue() { + return value; + } +} diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceOpenTelemetryMetricsRepository.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceOpenTelemetryMetricsRepository.java new file mode 100644 index 0000000000..8a8b26c1f3 --- /dev/null +++ b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceOpenTelemetryMetricsRepository.java @@ -0,0 +1,297 @@ +package com.linkedin.venice.stats; + +import static com.linkedin.venice.stats.VeniceOpenTelemetryMetricFormat.PASCAL_CASE; +import static io.opentelemetry.sdk.metrics.data.AggregationTemporality.DELTA; + +import com.linkedin.venice.exceptions.VeniceException; +import com.linkedin.venice.utils.concurrent.VeniceConcurrentHashMap; +import io.opentelemetry.api.OpenTelemetry; +import io.opentelemetry.api.metrics.DoubleHistogram; +import io.opentelemetry.api.metrics.DoubleHistogramBuilder; +import io.opentelemetry.api.metrics.LongCounter; +import io.opentelemetry.api.metrics.LongCounterBuilder; +import io.opentelemetry.api.metrics.Meter; +import io.opentelemetry.exporter.otlp.internal.OtlpMetricExporterProvider; +import io.opentelemetry.sdk.OpenTelemetrySdk; +import io.opentelemetry.sdk.common.CompletableResultCode; +import io.opentelemetry.sdk.metrics.Aggregation; +import io.opentelemetry.sdk.metrics.InstrumentSelector; +import io.opentelemetry.sdk.metrics.InstrumentType; +import io.opentelemetry.sdk.metrics.SdkMeterProvider; +import io.opentelemetry.sdk.metrics.SdkMeterProviderBuilder; +import io.opentelemetry.sdk.metrics.View; +import io.opentelemetry.sdk.metrics.data.AggregationTemporality; +import io.opentelemetry.sdk.metrics.data.MetricData; +import io.opentelemetry.sdk.metrics.export.MetricExporter; +import io.opentelemetry.sdk.metrics.export.PeriodicMetricReader; +import io.opentelemetry.sdk.resources.Resource; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + + +public class VeniceOpenTelemetryMetricsRepository { + private static final Logger LOGGER = LogManager.getLogger(VeniceOpenTelemetryMetricsRepository.class); + private OpenTelemetry openTelemetry = null; + private SdkMeterProvider sdkMeterProvider = null; + private boolean emitOpenTelemetryMetrics; + private VeniceOpenTelemetryMetricFormat metricFormat; + private Meter meter; + + private String metricPrefix; + + /** Below Maps are to create only one metric per name and type: Venice code will try to initialize the same metric multiple times as it will get + * called from per store path and per request type path. This will ensure that we only have one metric per name and + * use dimensions to differentiate between them. + */ + private final VeniceConcurrentHashMap histogramMap = new VeniceConcurrentHashMap<>(); + private final VeniceConcurrentHashMap counterMap = new VeniceConcurrentHashMap<>(); + + MetricExporter getOtlpHttpMetricExporter(VeniceMetricsConfig metricsConfig) { + OtlpMetricExporterProvider otlpMetricExporterProvider = new OtlpMetricExporterProvider(); + VeniceOpenTelemetryConfigProperties config = + VeniceOpenTelemetryConfigProperties.createFromMap(metricsConfig.getOtelConfigs()); + return otlpMetricExporterProvider.createExporter(config); + } + + public VeniceOpenTelemetryMetricsRepository(VeniceMetricsConfig metricsConfig) { + emitOpenTelemetryMetrics = metricsConfig.isEmitOpenTelemetryMetrics(); + metricFormat = metricsConfig.getMetricFormat(); + if (!emitOpenTelemetryMetrics) { + LOGGER.info("OpenTelemetry metrics are disabled"); + return; + } + LOGGER.info( + "OpenTelemetry initialization for {} started with config: {}", + metricsConfig.getServiceName(), + metricsConfig.toString()); + this.metricPrefix = transformMetricName("venice." + metricsConfig.getMetricPrefix()); + + try { + SdkMeterProviderBuilder builder = SdkMeterProvider.builder(); + if (metricsConfig.isEmitToHttpGrpcEndpoint()) { + MetricExporter httpExporter = getOtlpHttpMetricExporter(metricsConfig); + builder.registerMetricReader(PeriodicMetricReader.builder(httpExporter).build()); + } + if (metricsConfig.isEmitToLog()) { + // internal to test: Disabled by default + builder.registerMetricReader(PeriodicMetricReader.builder(new LogBasedMetricExporter()).build()); + } + if (metricsConfig.isUseExponentialHistogram()) { + /** + * {@link io.opentelemetry.exporter.internal.ExporterBuilderUtil#configureHistogramDefaultAggregation} + * doesn't take in buckets and scale configs. so using the below for now rather than passing these as + * configs to {@link #getOtlpHttpMetricExporter} + */ + builder.registerView( + InstrumentSelector.builder().setName("*").setType(InstrumentType.HISTOGRAM).build(), + View.builder() + .setAggregation( + Aggregation.base2ExponentialBucketHistogram( + metricsConfig.getExponentialHistogramMaxBuckets(), + metricsConfig.getExponentialHistogramMaxScale())) + .build()); + } + + builder.setResource(Resource.empty()); + sdkMeterProvider = builder.build(); + + // Register MeterProvider with OpenTelemetry instance + openTelemetry = OpenTelemetrySdk.builder().setMeterProvider(sdkMeterProvider).build(); + + this.meter = openTelemetry.getMeter(getMetricPrefix()); + LOGGER.info( + "OpenTelemetry initialization for {} completed with config: {}", + metricsConfig.getServiceName(), + metricsConfig.toString()); + } catch (Exception e) { + LOGGER.info( + "OpenTelemetry initialization for {} failed with config: {}", + metricsConfig.getServiceName(), + metricsConfig.toString(), + e); + throw new VeniceException("OpenTelemetry initialization for " + metricsConfig.getServiceName() + " failed", e); + } + } + + /** + * validate whether the metric name is a valid {@link VeniceOpenTelemetryMetricFormat#SNAKE_CASE} + */ + public static void validateMetricName(String name) { + if (name == null || name.isEmpty()) { + throw new IllegalArgumentException("Metric name cannot be null or empty. Input name: " + name); + } + if (name.contains(" ")) { + throw new IllegalArgumentException("Metric name cannot contain spaces. Input name: " + name); + } + // name should not contain any capital or special characters except for underscore and dot + if (!name.matches("^[a-z0-9_.]*$")) { + throw new IllegalArgumentException( + "Metric name can only contain lowercase alphabets, numbers, underscore and dot. Input name: " + name); + } + } + + String getFullMetricName(String metricPrefix, String name) { + String fullMetricName = metricPrefix + "." + name; + validateMetricName(fullMetricName); + return transformMetricName(fullMetricName); + } + + private String getMetricPrefix() { + return metricPrefix; + } + + /** + * Input should already be in {@link VeniceOpenTelemetryMetricFormat#SNAKE_CASE} as validated + * in {@link #validateMetricName}. + * + * If configured a different format, return the transformed format + */ + private String transformMetricName(String input) { + switch (metricFormat) { + case SNAKE_CASE: + return input; // input should be already in snake_case + case PASCAL_CASE: + case CAMEL_CASE: + return transformMetricName(input, metricFormat); + default: + throw new IllegalArgumentException("Unsupported metric format: " + metricFormat); + } + } + + public static String transformMetricName(String input, VeniceOpenTelemetryMetricFormat metricFormat) { + String[] words = input.split("\\."); + for (int i = 0; i < words.length; i++) { + if (!words[i].isEmpty()) { + String[] partWords = words[i].split("_"); + for (int j = 0; j < partWords.length; j++) { + if (metricFormat == PASCAL_CASE || j > 0) { + // either pascal case or camel case except for the first word + partWords[j] = capitalizeFirstLetter(partWords[j]); + } + } + StringBuilder sb = new StringBuilder(); + for (String partWord: partWords) { + sb.append(partWord); + } + words[i] = sb.toString(); + } + } + StringBuilder finalName = new StringBuilder(); + for (String word: words) { + finalName.append(word); + finalName.append("."); + } + // remove the last dot + if (finalName.length() > 0) { + finalName.deleteCharAt(finalName.length() - 1); + } + return finalName.toString(); + } + + private static String capitalizeFirstLetter(String word) { + if (word.isEmpty()) { + return word; + } + return Character.toUpperCase(word.charAt(0)) + word.substring(1); + } + + public DoubleHistogram getHistogram(String name, String unit, String description) { + if (emitOpenTelemetryMetrics) { + String fullMetricName = getFullMetricName(getMetricPrefix(), name); + if (openTelemetry != null) { + return histogramMap.computeIfAbsent(name, key -> { + DoubleHistogramBuilder builder = + meter.histogramBuilder(fullMetricName).setUnit(unit).setDescription(description); + return builder.build(); + }); + } else { + LOGGER.error("Metric instrument creation failed for metric {} because OpenTelemetry is not initialized", name); + return null; + } + } else { + return null; + } + } + + public DoubleHistogram getHistogramWithoutBuckets(String name, String unit, String description) { + if (emitOpenTelemetryMetrics) { + String fullMetricName = getFullMetricName(getMetricPrefix(), name); + if (openTelemetry != null) { + return histogramMap.computeIfAbsent(name, key -> { + DoubleHistogramBuilder builder = meter.histogramBuilder(fullMetricName) + .setExplicitBucketBoundariesAdvice(new ArrayList<>()) + .setUnit(unit) + .setDescription(description); + return builder.build(); + }); + } else { + LOGGER.error("Metric instrument creation failed for metric {} because OpenTelemetry is not initialized", name); + return null; + } + } else { + return null; + } + } + + public LongCounter getCounter(String name, String unit, String description) { + if (emitOpenTelemetryMetrics) { + String fullMetricName = getFullMetricName(getMetricPrefix(), name); + if (openTelemetry != null) { + return counterMap.computeIfAbsent(name, key -> { + LongCounterBuilder builder = meter.counterBuilder(fullMetricName).setUnit(unit).setDescription(description); + return builder.build(); + }); + } else { + LOGGER.error("Metric instrument creation failed for metric {} because OpenTelemetry is not initialized", name); + return null; + } + } else { + return null; + } + } + + public void close() { + LOGGER.info("OpenTelemetry close"); + sdkMeterProvider.shutdown(); + sdkMeterProvider = null; + } + + static class LogBasedMetricExporter implements MetricExporter { + @Override + public AggregationTemporality getAggregationTemporality(InstrumentType instrumentType) { + return DELTA; + } + + @Override + public CompletableResultCode export(Collection metrics) { + LOGGER.info("Logging OpenTelemetry metrics for debug purpose: {}", Arrays.toString(metrics.toArray())); + return CompletableResultCode.ofSuccess(); + } + + @Override + public CompletableResultCode flush() { + return CompletableResultCode.ofSuccess(); + } + + @Override + public CompletableResultCode shutdown() { + return CompletableResultCode.ofSuccess(); + } + } + + // for testing purpose + public SdkMeterProvider getSdkMeterProvider() { + return sdkMeterProvider; + } + + public OpenTelemetry getOpenTelemetry() { + return openTelemetry; + } + + public Meter getMeter() { + return meter; + } +} diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/utils/VeniceProperties.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/utils/VeniceProperties.java index e9855ece93..06bdb5d03c 100644 --- a/internal/venice-client-common/src/main/java/com/linkedin/venice/utils/VeniceProperties.java +++ b/internal/venice-client-common/src/main/java/com/linkedin/venice/utils/VeniceProperties.java @@ -458,4 +458,8 @@ public Properties toProperties() { public boolean isEmpty() { return this.props.isEmpty(); } + + public Map getPropsMap() { + return props; + } } diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/utils/metrics/MetricsRepositoryUtils.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/utils/metrics/MetricsRepositoryUtils.java index b95502ab31..022e547ff7 100644 --- a/internal/venice-client-common/src/main/java/com/linkedin/venice/utils/metrics/MetricsRepositoryUtils.java +++ b/internal/venice-client-common/src/main/java/com/linkedin/venice/utils/metrics/MetricsRepositoryUtils.java @@ -1,5 +1,7 @@ package com.linkedin.venice.utils.metrics; +import com.linkedin.venice.stats.VeniceMetricsConfig; +import com.linkedin.venice.stats.VeniceMetricsRepository; import io.tehuti.metrics.MetricConfig; import io.tehuti.metrics.MetricsRepository; import io.tehuti.metrics.stats.AsyncGauge; @@ -20,6 +22,10 @@ public static MetricsRepository createSingleThreadedMetricsRepository() { return createSingleThreadedMetricsRepository(TimeUnit.MINUTES.toMillis(1), 100); } + public static VeniceMetricsRepository createSingleThreadedVeniceMetricsRepository() { + return createSingleThreadedVeniceMetricsRepository(TimeUnit.MINUTES.toMillis(1), 100); + } + public static MetricsRepository createSingleThreadedMetricsRepository( long maxMetricsMeasurementTimeoutMs, long initialMetricsMeasurementTimeoutMs) { @@ -31,4 +37,17 @@ public static MetricsRepository createSingleThreadedMetricsRepository( .setMaxMetricsMeasurementTimeoutInMs(maxMetricsMeasurementTimeoutMs) .build())); } + + public static VeniceMetricsRepository createSingleThreadedVeniceMetricsRepository( + long maxMetricsMeasurementTimeoutMs, + long initialMetricsMeasurementTimeoutMs) { + MetricConfig tehutiMetricsConfig = new MetricConfig( + new AsyncGauge.AsyncGaugeExecutor.Builder().setMetricMeasurementThreadCount(1) + .setSlowMetricMeasurementThreadCount(1) + .setInitialMetricsMeasurementTimeoutInMs(initialMetricsMeasurementTimeoutMs) + .setMaxMetricsMeasurementTimeoutInMs(maxMetricsMeasurementTimeoutMs) + .build()); + return new VeniceMetricsRepository( + new VeniceMetricsConfig.VeniceMetricsConfigBuilder().setTehutiMetricConfig(tehutiMetricsConfig).build()); + } } diff --git a/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/VeniceMetricsConfigTest.java b/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/VeniceMetricsConfigTest.java new file mode 100644 index 0000000000..1c1bd06b2b --- /dev/null +++ b/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/VeniceMetricsConfigTest.java @@ -0,0 +1,107 @@ +package com.linkedin.venice.stats; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertThrows; +import static org.testng.Assert.assertTrue; + +import com.linkedin.venice.exceptions.VeniceException; +import com.linkedin.venice.stats.VeniceMetricsConfig.VeniceMetricsConfigBuilder; +import io.tehuti.metrics.MetricConfig; +import java.util.HashMap; +import java.util.Map; +import org.testng.annotations.Test; + + +public class VeniceMetricsConfigTest { + @Test + public void testDefaultValues() { + VeniceMetricsConfig config = new VeniceMetricsConfigBuilder().build(); + assertEquals(config.getServiceName(), "noop_service"); + assertEquals(config.getMetricPrefix(), "service"); + assertFalse(config.isEmitOpenTelemetryMetrics()); + assertFalse(config.isEmitToHttpGrpcEndpoint()); + assertFalse(config.isEmitToLog()); + assertTrue(config.isUseExponentialHistogram()); + assertEquals(config.getExponentialHistogramMaxScale(), 3); + assertEquals(config.getExponentialHistogramMaxBuckets(), 250); + } + + @Test + public void testCustomValues() { + Map otelConfigs = new HashMap<>(); + otelConfigs.put("otel.venice.enabled", "true"); + otelConfigs.put("otel.venice.export.to.log", "true"); + + MetricConfig metricConfig = new MetricConfig(); + + VeniceMetricsConfig config = new VeniceMetricsConfigBuilder().setServiceName("TestService") + .setMetricPrefix("TestPrefix") + .setTehutiMetricConfig(metricConfig) + .extractAndSetOtelConfigs(otelConfigs) + .build(); + + assertEquals(config.getServiceName(), "TestService"); + assertEquals(config.getMetricPrefix(), "TestPrefix"); + assertTrue(config.isEmitOpenTelemetryMetrics()); + assertTrue(config.getOtelConfigs().containsKey("otel.venice.enabled")); + assertTrue(config.isEmitToLog()); + assertEquals(config.getTehutiMetricConfig(), metricConfig); + } + + @Test + public void testOtelMissingConfigs() { + Map invalidOtelConfigs = new HashMap<>(); + invalidOtelConfigs.put("otel.venice.enabled", "true"); + invalidOtelConfigs.put("otel.venice.export.to.http.grpc.endpoint", "true"); + + VeniceMetricsConfigBuilder builder = new VeniceMetricsConfigBuilder().extractAndSetOtelConfigs(invalidOtelConfigs); + + // should throw exception because required configs are missing + assertThrows(VeniceException.class, builder::build); + } + + @Test + public void testGetMetricsPrefix() { + assertEquals(VeniceMetricsConfigBuilder.getMetricsPrefix("venice-router"), "router"); + assertEquals(VeniceMetricsConfigBuilder.getMetricsPrefix("service_name"), "name"); + assertEquals(VeniceMetricsConfigBuilder.getMetricsPrefix("com.linkedin.service"), "service"); + } + + @Test + public void testOtelConfigWithInvalidMetricFormat() { + Map otelConfigs = new HashMap<>(); + otelConfigs.put("otel.venice.metrics.format", "INVALID_FORMAT"); + + VeniceMetricsConfig config = new VeniceMetricsConfigBuilder().extractAndSetOtelConfigs(otelConfigs).build(); + + assertEquals( + config.getMetricFormat(), + VeniceOpenTelemetryMetricFormat.SNAKE_CASE, + "Invalid metric format should fall back to default."); + } + + @Test + public void testOtelConfigWithValidMetricFormat() { + Map otelConfigs = new HashMap<>(); + otelConfigs.put("otel.venice.enabled", "true"); + otelConfigs.put("otel.venice.metrics.format", "CAMEL_CASE"); + + VeniceMetricsConfig config = new VeniceMetricsConfigBuilder().extractAndSetOtelConfigs(otelConfigs).build(); + + assertEquals(config.getMetricFormat(), VeniceOpenTelemetryMetricFormat.CAMEL_CASE); + } + + @Test + public void testEnableHttpGrpcEndpointConfigWithRequiredFields() { + Map otelConfigs = new HashMap<>(); + otelConfigs.put("otel.venice.enabled", "true"); + otelConfigs.put("otel.venice.export.to.http.grpc.endpoint", "true"); + otelConfigs.put("otel.exporter.otlp.metrics.protocol", "http/protobuf"); + otelConfigs.put("otel.exporter.otlp.metrics.endpoint", "http://localhost"); + + VeniceMetricsConfig config = new VeniceMetricsConfigBuilder().extractAndSetOtelConfigs(otelConfigs).build(); + + assertTrue(config.isEmitToHttpGrpcEndpoint()); + } +} diff --git a/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/VeniceMetricsRepositoryTest.java b/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/VeniceMetricsRepositoryTest.java new file mode 100644 index 0000000000..d2de313da6 --- /dev/null +++ b/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/VeniceMetricsRepositoryTest.java @@ -0,0 +1,130 @@ +package com.linkedin.venice.stats; + +import static org.testng.Assert.assertEquals; + +import io.tehuti.Metric; +import io.tehuti.metrics.Measurable; +import io.tehuti.metrics.MetricsReporter; +import io.tehuti.metrics.MetricsRepository; +import io.tehuti.metrics.Sensor; +import io.tehuti.metrics.stats.AsyncGauge; +import java.util.Map; +import org.mockito.Mockito; +import org.testng.annotations.AfterMethod; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + + +public class VeniceMetricsRepositoryTest { + private VeniceMetricsRepository metricsRepository; + private VeniceOpenTelemetryMetricsRepository mockOpenTelemetryMetricsRepository; + private MetricsRepository mockDelegate; + + @BeforeMethod + public void setUp() { + VeniceMetricsConfig config = new VeniceMetricsConfig.VeniceMetricsConfigBuilder().build(); + mockOpenTelemetryMetricsRepository = Mockito.mock(VeniceOpenTelemetryMetricsRepository.class); + mockDelegate = Mockito.mock(MetricsRepository.class); + metricsRepository = new VeniceMetricsRepository(mockDelegate, config, mockOpenTelemetryMetricsRepository); + } + + @AfterMethod + public void tearDown() { + metricsRepository.close(); + } + + @Test + public void testConstructorWithDelegateAndConfig() { + VeniceMetricsConfig config = new VeniceMetricsConfig.VeniceMetricsConfigBuilder().build(); + VeniceMetricsRepository repo = + new VeniceMetricsRepository(mockDelegate, config, mockOpenTelemetryMetricsRepository); + + assertEquals(repo.getVeniceMetricsConfig(), config); + assertEquals(repo.getOpenTelemetryMetricsRepository(), mockOpenTelemetryMetricsRepository); + } + + @Test + public void testCloseWithDelegate() { + metricsRepository.close(); + Mockito.verify(mockDelegate, Mockito.times(1)).close(); + Mockito.verify(mockOpenTelemetryMetricsRepository, Mockito.times(1)).close(); + } + + @Test + public void testAddMetricDelegation() { + Measurable measurable = Mockito.mock(Measurable.class); + Metric metric = Mockito.mock(Metric.class); + + Mockito.when(mockDelegate.addMetric("testMetric", measurable)).thenReturn(metric); + + Metric returnedMetric = metricsRepository.addMetric("testMetric", measurable); + assertEquals(returnedMetric, metric); + + Mockito.verify(mockDelegate, Mockito.times(1)).addMetric("testMetric", measurable); + } + + @Test + public void testGetSensorDelegation() { + Sensor sensor = Mockito.mock(Sensor.class); + Mockito.when(mockDelegate.getSensor("testSensor")).thenReturn(sensor); + + Sensor returnedSensor = metricsRepository.getSensor("testSensor"); + assertEquals(returnedSensor, sensor); + + Mockito.verify(mockDelegate, Mockito.times(1)).getSensor("testSensor"); + } + + @Test + public void testMetricsRetrieval() { + Map mockMetrics = Mockito.mock(Map.class); + Mockito.doReturn(mockMetrics).when(mockDelegate).metrics(); + + Map retrievedMetrics = metricsRepository.metrics(); + assertEquals(retrievedMetrics, mockMetrics); + + Mockito.verify(mockDelegate, Mockito.times(1)).metrics(); + } + + @Test + public void testGetMetricDelegation() { + Metric metric = Mockito.mock(Metric.class); + Mockito.when(mockDelegate.getMetric("testMetric")).thenReturn(metric); + + Metric retrievedMetric = metricsRepository.getMetric("testMetric"); + assertEquals(retrievedMetric, metric); + + Mockito.verify(mockDelegate, Mockito.times(1)).getMetric("testMetric"); + } + + @Test + public void testAddReporterDelegation() { + MetricsReporter mockReporter = Mockito.mock(MetricsReporter.class); + + metricsRepository.addReporter(mockReporter); + Mockito.verify(mockDelegate, Mockito.times(1)).addReporter(mockReporter); + } + + @Test + public void testAsyncGaugeExecutorDelegation() { + AsyncGauge.AsyncGaugeExecutor asyncGaugeExecutor = Mockito.mock(AsyncGauge.AsyncGaugeExecutor.class); + Mockito.when(mockDelegate.getAsyncGaugeExecutor()).thenReturn(asyncGaugeExecutor); + + AsyncGauge.AsyncGaugeExecutor executor = metricsRepository.getAsyncGaugeExecutor(); + assertEquals(executor, asyncGaugeExecutor); + + Mockito.verify(mockDelegate, Mockito.times(1)).getAsyncGaugeExecutor(); + } + + @Test + public void testSensorCreationAndDeletionWithDelegate() { + Sensor mockSensor = Mockito.mock(Sensor.class); + Mockito.when(mockDelegate.sensor("testSensor")).thenReturn(mockSensor); + + Sensor sensor = metricsRepository.sensor("testSensor"); + assertEquals(sensor, mockSensor); + + metricsRepository.removeSensor("testSensor"); + Mockito.verify(mockDelegate, Mockito.times(1)).sensor("testSensor"); + Mockito.verify(mockDelegate, Mockito.times(1)).removeSensor("testSensor"); + } +} diff --git a/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/VeniceOpenTelemetryMetricsRepositoryTest.java b/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/VeniceOpenTelemetryMetricsRepositoryTest.java new file mode 100644 index 0000000000..85b9d109cc --- /dev/null +++ b/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/VeniceOpenTelemetryMetricsRepositoryTest.java @@ -0,0 +1,141 @@ +package com.linkedin.venice.stats; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertNotNull; +import static org.testng.Assert.assertNull; +import static org.testng.Assert.assertSame; +import static org.testng.Assert.fail; + +import io.opentelemetry.api.metrics.DoubleHistogram; +import io.opentelemetry.api.metrics.LongCounter; +import io.opentelemetry.sdk.metrics.export.MetricExporter; +import java.util.HashMap; +import org.mockito.Mock; +import org.mockito.Mockito; +import org.mockito.MockitoAnnotations; +import org.testng.annotations.AfterMethod; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + + +public class VeniceOpenTelemetryMetricsRepositoryTest { + private VeniceOpenTelemetryMetricsRepository metricsRepository; + + @Mock + private VeniceMetricsConfig mockMetricsConfig; + + @BeforeMethod + public void setUp() { + MockitoAnnotations.initMocks(this); + + Mockito.when(mockMetricsConfig.isEmitOpenTelemetryMetrics()).thenReturn(true); + Mockito.when(mockMetricsConfig.getMetricFormat()).thenReturn(VeniceOpenTelemetryMetricFormat.SNAKE_CASE); + Mockito.when(mockMetricsConfig.getMetricPrefix()).thenReturn("test_prefix"); + Mockito.when(mockMetricsConfig.getServiceName()).thenReturn("test_service"); + Mockito.when(mockMetricsConfig.isEmitToHttpGrpcEndpoint()).thenReturn(true); + Mockito.when(mockMetricsConfig.isUseExponentialHistogram()).thenReturn(false); + + metricsRepository = new VeniceOpenTelemetryMetricsRepository(mockMetricsConfig); + } + + @AfterMethod + public void tearDown() { + metricsRepository.close(); + } + + @Test + public void testConstructorInitialize() { + // Check if OpenTelemetry and SdkMeterProvider are initialized correctly + assertNotNull(metricsRepository.getOpenTelemetry()); + assertNotNull(metricsRepository.getSdkMeterProvider()); + assertNotNull(metricsRepository.getMeter()); + } + + @Test + public void testConstructorWithEmitDisabled() { + Mockito.when(mockMetricsConfig.isEmitOpenTelemetryMetrics()).thenReturn(false); + VeniceOpenTelemetryMetricsRepository metricsRepository = + new VeniceOpenTelemetryMetricsRepository(mockMetricsConfig); + + // Verify that metrics-related fields are null when metrics are disabled + assertNull(metricsRepository.getOpenTelemetry()); + assertNull(metricsRepository.getSdkMeterProvider()); + assertNull(metricsRepository.getMeter()); + assertNull(metricsRepository.getHistogram("test", "unit", "desc")); + assertNull(metricsRepository.getCounter("test", "unit", "desc")); + } + + @Test + public void testGetOtlpHttpMetricExporterWithValidConfig() { + HashMap otelConfigs = new HashMap<>(); + otelConfigs.put("otel.exporter.otlp.endpoint", "http://localhost:4318"); + Mockito.when(mockMetricsConfig.getOtelConfigs()).thenReturn(otelConfigs); + + MetricExporter exporter = metricsRepository.getOtlpHttpMetricExporter(mockMetricsConfig); + + // Verify that the exporter is not null and is of the expected type + assertNotNull(exporter); + + // Check that the exporter uses the correct endpoint + assertEquals(otelConfigs.get("otel.exporter.otlp.endpoint"), "http://localhost:4318"); + } + + @Test + public void testGetOtlpHttpMetricExporterWithEmptyConfig() { + Mockito.when(mockMetricsConfig.getOtelConfigs()).thenReturn(new HashMap<>()); + + try { + MetricExporter exporter = metricsRepository.getOtlpHttpMetricExporter(mockMetricsConfig); + assertNotNull(exporter, "Exporter should be created even with an empty config."); + } catch (Exception e) { + fail("Exporter creation should not throw an exception with empty config."); + } + } + + @Test(expectedExceptions = IllegalArgumentException.class) + public void testValidateMetricNameWithNullName() { + VeniceOpenTelemetryMetricsRepository.validateMetricName(null); + } + + @Test(expectedExceptions = IllegalArgumentException.class) + public void testValidateMetricNameWithEmptyName() { + VeniceOpenTelemetryMetricsRepository.validateMetricName(""); + } + + @Test(expectedExceptions = IllegalArgumentException.class) + public void testValidateMetricNameWithInvalidName() { + VeniceOpenTelemetryMetricsRepository.validateMetricName("Invalid Name!"); + } + + @Test + public void testTransformMetricName() { + Mockito.when(mockMetricsConfig.getMetricFormat()).thenReturn(VeniceOpenTelemetryMetricFormat.SNAKE_CASE); + assertEquals(metricsRepository.getFullMetricName("prefix", "metric_name"), "prefix.metric_name"); + + String transformedName = VeniceOpenTelemetryMetricsRepository + .transformMetricName("test.test_metric_name", VeniceOpenTelemetryMetricFormat.PASCAL_CASE); + assertEquals(transformedName, "Test.TestMetricName"); + + transformedName = VeniceOpenTelemetryMetricsRepository + .transformMetricName("test.test_metric_name", VeniceOpenTelemetryMetricFormat.CAMEL_CASE); + assertEquals(transformedName, "test.testMetricName"); + } + + @Test + public void testCreateTwoHistograms() { + DoubleHistogram histogram1 = metricsRepository.getHistogram("test_histogram", "unit", "description"); + DoubleHistogram histogram2 = metricsRepository.getHistogram("test_histogram", "unit", "description"); + + assertNotNull(histogram1); + assertSame(histogram1, histogram2, "Should return the same instance for the same histogram name."); + } + + @Test + public void testCreateTwoCounters() { + LongCounter counter1 = metricsRepository.getCounter("test_counter", "unit", "description"); + LongCounter counter2 = metricsRepository.getCounter("test_counter", "unit", "description"); + + assertNotNull(counter1); + assertSame(counter1, counter2, "Should return the same instance for the same counter name."); + } +} diff --git a/internal/venice-common/src/main/java/com/linkedin/venice/stats/AbstractVeniceAggStoreStats.java b/internal/venice-common/src/main/java/com/linkedin/venice/stats/AbstractVeniceAggStoreStats.java index 07483b6099..85b335f2ce 100644 --- a/internal/venice-common/src/main/java/com/linkedin/venice/stats/AbstractVeniceAggStoreStats.java +++ b/internal/venice-common/src/main/java/com/linkedin/venice/stats/AbstractVeniceAggStoreStats.java @@ -18,7 +18,7 @@ public class AbstractVeniceAggStoreStats extends public AbstractVeniceAggStoreStats( String clusterName, MetricsRepository metricsRepository, - StatsSupplier statsSupplier, + StatsSupplierMetricsRepository statsSupplier, ReadOnlyStoreRepository metadataRepository, boolean isUnregisterMetricForDeletedStoreEnabled) { super(clusterName, metricsRepository, statsSupplier); @@ -28,7 +28,7 @@ public AbstractVeniceAggStoreStats( public AbstractVeniceAggStoreStats( MetricsRepository metricsRepository, - StatsSupplier statsSupplier, + StatsSupplierMetricsRepository statsSupplier, ReadOnlyStoreRepository metadataRepository, boolean isUnregisterMetricForDeletedStoreEnabled) { super(metricsRepository, statsSupplier); @@ -38,9 +38,10 @@ public AbstractVeniceAggStoreStats( public AbstractVeniceAggStoreStats( MetricsRepository metricsRepository, + String clusterName, ReadOnlyStoreRepository metadataRepository, boolean isUnregisterMetricForDeletedStoreEnabled) { - super(metricsRepository); + super(metricsRepository, clusterName); this.isUnregisterMetricForDeletedStoreEnabled = isUnregisterMetricForDeletedStoreEnabled; registerStoreDataChangedListenerIfRequired(metadataRepository); } diff --git a/internal/venice-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceHttpResponseStatusCodeCategory.java b/internal/venice-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceHttpResponseStatusCodeCategory.java new file mode 100644 index 0000000000..dce14cc69e --- /dev/null +++ b/internal/venice-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceHttpResponseStatusCodeCategory.java @@ -0,0 +1,108 @@ +package com.linkedin.venice.stats.dimensions; + +/** + * Copied {@link io.netty.handler.codec.http.HttpStatusClass} and modified it to have 1xx, 2xx, etc. as categories + */ +public enum VeniceHttpResponseStatusCodeCategory { + INFORMATIONAL(100, 200, "1xx"), + /** + * The success class (2xx) + */ + SUCCESS(200, 300, "2xx"), + /** + * The redirection class (3xx) + */ + REDIRECTION(300, 400, "3xx"), + /** + * The client error class (4xx) + */ + CLIENT_ERROR(400, 500, "4xx"), + /** + * The server error class (5xx) + */ + SERVER_ERROR(500, 600, "5xx"), + /** + * The unknown class + */ + UNKNOWN(0, 0, "Unknown") { + @Override + public boolean contains(int code) { + return code < 100 || code >= 600; + } + }; + + /** + * Returns the class of the specified HTTP status code. + */ + public static VeniceHttpResponseStatusCodeCategory valueOf(int code) { + if (INFORMATIONAL.contains(code)) { + return INFORMATIONAL; + } + if (SUCCESS.contains(code)) { + return SUCCESS; + } + if (REDIRECTION.contains(code)) { + return REDIRECTION; + } + if (CLIENT_ERROR.contains(code)) { + return CLIENT_ERROR; + } + if (SERVER_ERROR.contains(code)) { + return SERVER_ERROR; + } + return UNKNOWN; + } + + /** + * Returns the class of the specified HTTP status code. + * @param code Just the numeric portion of the http status code. + */ + public static VeniceHttpResponseStatusCodeCategory valueOf(CharSequence code) { + if (code != null && code.length() == 3) { + char c0 = code.charAt(0); + return isDigit(c0) && isDigit(code.charAt(1)) && isDigit(code.charAt(2)) ? valueOf(digit(c0) * 100) : UNKNOWN; + } + return UNKNOWN; + } + + private static int digit(char c) { + return c - '0'; + } + + private static boolean isDigit(char c) { + return c >= '0' && c <= '9'; + } + + private final int min; + private final int max; + private final String category; + + VeniceHttpResponseStatusCodeCategory(int min, int max, String category) { + this.min = min; + this.max = max; + this.category = category; + } + + /** + * Returns {@code true} if and only if the specified HTTP status code falls into this class. + */ + public boolean contains(int code) { + return code >= min && code < max; + } + + /** + * Returns the category of this HTTP status class. + */ + public String getCategory() { + return category; + } + + // used for tests + public int getMin() { + return min; + } + + public int getMax() { + return max; + } +} diff --git a/internal/venice-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceMetricsDimensions.java b/internal/venice-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceMetricsDimensions.java new file mode 100644 index 0000000000..52cb21f610 --- /dev/null +++ b/internal/venice-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceMetricsDimensions.java @@ -0,0 +1,48 @@ +package com.linkedin.venice.stats.dimensions; + +import static com.linkedin.venice.stats.VeniceOpenTelemetryMetricFormat.CAMEL_CASE; +import static com.linkedin.venice.stats.VeniceOpenTelemetryMetricFormat.PASCAL_CASE; +import static com.linkedin.venice.stats.VeniceOpenTelemetryMetricFormat.SNAKE_CASE; +import static com.linkedin.venice.stats.VeniceOpenTelemetryMetricsRepository.transformMetricName; +import static com.linkedin.venice.stats.VeniceOpenTelemetryMetricsRepository.validateMetricName; + +import com.linkedin.venice.stats.VeniceOpenTelemetryMetricFormat; + + +public enum VeniceMetricsDimensions { + VENICE_STORE_NAME("venice.store.name"), VENICE_CLUSTER_NAME("venice.cluster.name"), + + /** {@link com.linkedin.venice.read.RequestType#requestTypeName} */ + VENICE_REQUEST_METHOD("venice.request.method"), + + /** {@link io.netty.handler.codec.http.HttpResponseStatus} ie. 200, 400, etc */ + HTTP_RESPONSE_STATUS_CODE("http.response.status_code"), + + /** {@link VeniceHttpResponseStatusCodeCategory#category} ie. 1xx, 2xx, etc */ + HTTP_RESPONSE_STATUS_CODE_CATEGORY("http.response.status_code_category"), + + /** {@link VeniceRequestValidationOutcome#outcome} */ + VENICE_REQUEST_VALIDATION_OUTCOME("venice.request.validation_outcome"), + + /** {@link VeniceResponseStatusCategory} */ + VENICE_RESPONSE_STATUS_CODE_CATEGORY("venice.response.status_code_category"), + + /** {@link VeniceRequestRetryType} */ + VENICE_REQUEST_RETRY_TYPE("venice.request.retry_type"), + + /** {@link VeniceRequestRetryAbortReason} */ + VENICE_REQUEST_RETRY_ABORT_REASON("venice.request.retry_abort_reason"); + + private final String[] dimensionName = new String[VeniceOpenTelemetryMetricFormat.SIZE]; + + VeniceMetricsDimensions(String dimensionName) { + validateMetricName(dimensionName); + this.dimensionName[SNAKE_CASE.getValue()] = dimensionName; + this.dimensionName[CAMEL_CASE.getValue()] = transformMetricName(dimensionName, CAMEL_CASE); + this.dimensionName[PASCAL_CASE.getValue()] = transformMetricName(dimensionName, PASCAL_CASE); + } + + public String getDimensionName(VeniceOpenTelemetryMetricFormat format) { + return dimensionName[format.getValue()]; + } +} diff --git a/internal/venice-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceRequestRetryAbortReason.java b/internal/venice-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceRequestRetryAbortReason.java new file mode 100644 index 0000000000..adc143042d --- /dev/null +++ b/internal/venice-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceRequestRetryAbortReason.java @@ -0,0 +1,17 @@ +package com.linkedin.venice.stats.dimensions; + +public enum VeniceRequestRetryAbortReason { + RETRY_ABORTED_BY_SLOW_ROUTE("slow_route"), RETRY_ABORTED_BY_DELAY_CONSTRAINT("delay_constraint"), + RETRY_ABORTED_BY_MAX_RETRY_ROUTE_LIMIT("max_retry_router_limit"), + RETRY_ABORTED_BY_NO_AVAILABLE_REPLICA("no_available_replica"); + + private final String abortReason; + + VeniceRequestRetryAbortReason(String abortReason) { + this.abortReason = abortReason; + } + + public String getAbortReason() { + return this.abortReason; + } +} diff --git a/internal/venice-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceRequestRetryType.java b/internal/venice-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceRequestRetryType.java new file mode 100644 index 0000000000..7faa49c5a4 --- /dev/null +++ b/internal/venice-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceRequestRetryType.java @@ -0,0 +1,15 @@ +package com.linkedin.venice.stats.dimensions; + +public enum VeniceRequestRetryType { + ERROR_RETRY("error_retry"), LONG_TAIL_RETRY("long_tail_retry"); + + private final String retryType; + + VeniceRequestRetryType(String retryType) { + this.retryType = retryType; + } + + public String getRetryType() { + return this.retryType; + } +} diff --git a/internal/venice-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceRequestValidationOutcome.java b/internal/venice-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceRequestValidationOutcome.java new file mode 100644 index 0000000000..8b961e4592 --- /dev/null +++ b/internal/venice-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceRequestValidationOutcome.java @@ -0,0 +1,15 @@ +package com.linkedin.venice.stats.dimensions; + +public enum VeniceRequestValidationOutcome { + VALID("valid"), INVALID_KEY_COUNT_LIMIT_EXCEEDED("invalid_key_count_limit_exceeded"); + + private final String outcome; + + VeniceRequestValidationOutcome(String outcome) { + this.outcome = outcome; + } + + public String getOutcome() { + return this.outcome; + } +} diff --git a/internal/venice-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceResponseStatusCategory.java b/internal/venice-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceResponseStatusCategory.java new file mode 100644 index 0000000000..80372c0e2d --- /dev/null +++ b/internal/venice-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceResponseStatusCategory.java @@ -0,0 +1,15 @@ +package com.linkedin.venice.stats.dimensions; + +public enum VeniceResponseStatusCategory { + HEALTHY("healthy"), UNHEALTHY("unhealthy"), TARDY("tardy"), THROTTLED("throttled"), BAD_REQUEST("bad_request"); + + private final String category; + + VeniceResponseStatusCategory(String category) { + this.category = category; + } + + public String getCategory() { + return this.category; + } +} diff --git a/internal/venice-common/src/test/java/com/linkedin/venice/stats/dimensions/VeniceHttpResponseStatusCodeCategoryTest.java b/internal/venice-common/src/test/java/com/linkedin/venice/stats/dimensions/VeniceHttpResponseStatusCodeCategoryTest.java new file mode 100644 index 0000000000..2ba9e55dba --- /dev/null +++ b/internal/venice-common/src/test/java/com/linkedin/venice/stats/dimensions/VeniceHttpResponseStatusCodeCategoryTest.java @@ -0,0 +1,56 @@ +package com.linkedin.venice.stats.dimensions; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertNotEquals; + +import org.testng.annotations.Test; + + +public class VeniceHttpResponseStatusCodeCategoryTest { + @Test() + public void testValues() { + for (VeniceHttpResponseStatusCodeCategory category: VeniceHttpResponseStatusCodeCategory.values()) { + switch (category) { + case INFORMATIONAL: + assertEquals(category.getCategory(), "1xx"); + assertEquals(category.getMin(), 100); + assertEquals(category.getMax(), 200); + break; + case SUCCESS: + assertEquals(category.getCategory(), "2xx"); + assertEquals(category.getMin(), 200); + assertEquals(category.getMax(), 300); + break; + case REDIRECTION: + assertEquals(category.getCategory(), "3xx"); + assertEquals(category.getMin(), 300); + assertEquals(category.getMax(), 400); + break; + case CLIENT_ERROR: + assertEquals(category.getCategory(), "4xx"); + assertEquals(category.getMin(), 400); + assertEquals(category.getMax(), 500); + break; + case SERVER_ERROR: + assertEquals(category.getCategory(), "5xx"); + assertEquals(category.getMin(), 500); + assertEquals(category.getMax(), 600); + break; + case UNKNOWN: + assertEquals(category.getCategory(), "Unknown"); + assertEquals(category.getMin(), 0); + assertEquals(category.getMax(), 0); + break; + default: + throw new IllegalArgumentException("Unknown category: " + category); + } + } + } + + @Test + public void testUnknownCategory() { + assertEquals(VeniceHttpResponseStatusCodeCategory.valueOf(99), VeniceHttpResponseStatusCodeCategory.UNKNOWN); + assertNotEquals(VeniceHttpResponseStatusCodeCategory.valueOf(100), VeniceHttpResponseStatusCodeCategory.UNKNOWN); + assertEquals(VeniceHttpResponseStatusCodeCategory.valueOf(600), VeniceHttpResponseStatusCodeCategory.UNKNOWN); + } +} diff --git a/internal/venice-common/src/test/java/com/linkedin/venice/stats/dimensions/VeniceMetricsDimensionsTest.java b/internal/venice-common/src/test/java/com/linkedin/venice/stats/dimensions/VeniceMetricsDimensionsTest.java new file mode 100644 index 0000000000..1e459f40ed --- /dev/null +++ b/internal/venice-common/src/test/java/com/linkedin/venice/stats/dimensions/VeniceMetricsDimensionsTest.java @@ -0,0 +1,123 @@ +package com.linkedin.venice.stats.dimensions; + +import static org.testng.Assert.assertEquals; + +import com.linkedin.venice.stats.VeniceOpenTelemetryMetricFormat; +import org.testng.annotations.Test; + + +public class VeniceMetricsDimensionsTest { + @Test + public void testGetDimensionNameInSnakeCase() { + VeniceOpenTelemetryMetricFormat format = VeniceOpenTelemetryMetricFormat.SNAKE_CASE; + for (VeniceMetricsDimensions dimension: VeniceMetricsDimensions.values()) { + switch (dimension) { + case VENICE_STORE_NAME: + assertEquals(dimension.getDimensionName(format), "venice.store.name"); + break; + case VENICE_CLUSTER_NAME: + assertEquals(dimension.getDimensionName(format), "venice.cluster.name"); + break; + case VENICE_REQUEST_METHOD: + assertEquals(dimension.getDimensionName(format), "venice.request.method"); + break; + case HTTP_RESPONSE_STATUS_CODE: + assertEquals(dimension.getDimensionName(format), "http.response.status_code"); + break; + case HTTP_RESPONSE_STATUS_CODE_CATEGORY: + assertEquals(dimension.getDimensionName(format), "http.response.status_code_category"); + break; + case VENICE_REQUEST_VALIDATION_OUTCOME: + assertEquals(dimension.getDimensionName(format), "venice.request.validation_outcome"); + break; + case VENICE_RESPONSE_STATUS_CODE_CATEGORY: + assertEquals(dimension.getDimensionName(format), "venice.response.status_code_category"); + break; + case VENICE_REQUEST_RETRY_TYPE: + assertEquals(dimension.getDimensionName(format), "venice.request.retry_type"); + break; + case VENICE_REQUEST_RETRY_ABORT_REASON: + assertEquals(dimension.getDimensionName(format), "venice.request.retry_abort_reason"); + break; + default: + throw new IllegalArgumentException("Unknown dimension: " + dimension); + } + } + } + + @Test + public void testGetDimensionNameInCamelCase() { + VeniceOpenTelemetryMetricFormat format = VeniceOpenTelemetryMetricFormat.CAMEL_CASE; + for (VeniceMetricsDimensions dimension: VeniceMetricsDimensions.values()) { + switch (dimension) { + case VENICE_STORE_NAME: + assertEquals(dimension.getDimensionName(format), "venice.store.name"); + break; + case VENICE_CLUSTER_NAME: + assertEquals(dimension.getDimensionName(format), "venice.cluster.name"); + break; + case VENICE_REQUEST_METHOD: + assertEquals(dimension.getDimensionName(format), "venice.request.method"); + break; + case HTTP_RESPONSE_STATUS_CODE: + assertEquals(dimension.getDimensionName(format), "http.response.statusCode"); + break; + case HTTP_RESPONSE_STATUS_CODE_CATEGORY: + assertEquals(dimension.getDimensionName(format), "http.response.statusCodeCategory"); + break; + case VENICE_REQUEST_VALIDATION_OUTCOME: + assertEquals(dimension.getDimensionName(format), "venice.request.validationOutcome"); + break; + case VENICE_RESPONSE_STATUS_CODE_CATEGORY: + assertEquals(dimension.getDimensionName(format), "venice.response.statusCodeCategory"); + break; + case VENICE_REQUEST_RETRY_TYPE: + assertEquals(dimension.getDimensionName(format), "venice.request.retryType"); + break; + case VENICE_REQUEST_RETRY_ABORT_REASON: + assertEquals(dimension.getDimensionName(format), "venice.request.retryAbortReason"); + break; + default: + throw new IllegalArgumentException("Unknown dimension: " + dimension); + } + } + } + + @Test + public void testGetDimensionNameInPascalCase() { + VeniceOpenTelemetryMetricFormat format = VeniceOpenTelemetryMetricFormat.PASCAL_CASE; + for (VeniceMetricsDimensions dimension: VeniceMetricsDimensions.values()) { + switch (dimension) { + case VENICE_STORE_NAME: + assertEquals(dimension.getDimensionName(format), "Venice.Store.Name"); + break; + case VENICE_CLUSTER_NAME: + assertEquals(dimension.getDimensionName(format), "Venice.Cluster.Name"); + break; + case VENICE_REQUEST_METHOD: + assertEquals(dimension.getDimensionName(format), "Venice.Request.Method"); + break; + case HTTP_RESPONSE_STATUS_CODE: + assertEquals(dimension.getDimensionName(format), "Http.Response.StatusCode"); + break; + case HTTP_RESPONSE_STATUS_CODE_CATEGORY: + assertEquals(dimension.getDimensionName(format), "Http.Response.StatusCodeCategory"); + break; + case VENICE_REQUEST_VALIDATION_OUTCOME: + assertEquals(dimension.getDimensionName(format), "Venice.Request.ValidationOutcome"); + break; + case VENICE_RESPONSE_STATUS_CODE_CATEGORY: + assertEquals(dimension.getDimensionName(format), "Venice.Response.StatusCodeCategory"); + break; + case VENICE_REQUEST_RETRY_TYPE: + assertEquals(dimension.getDimensionName(format), "Venice.Request.RetryType"); + break; + case VENICE_REQUEST_RETRY_ABORT_REASON: + assertEquals(dimension.getDimensionName(format), "Venice.Request.RetryAbortReason"); + break; + default: + throw new IllegalArgumentException("Unknown dimension: " + dimension); + } + } + } +} diff --git a/internal/venice-common/src/test/java/com/linkedin/venice/stats/dimensions/VeniceRequestRetryAbortReasonTest.java b/internal/venice-common/src/test/java/com/linkedin/venice/stats/dimensions/VeniceRequestRetryAbortReasonTest.java new file mode 100644 index 0000000000..6476b3e1c2 --- /dev/null +++ b/internal/venice-common/src/test/java/com/linkedin/venice/stats/dimensions/VeniceRequestRetryAbortReasonTest.java @@ -0,0 +1,30 @@ +package com.linkedin.venice.stats.dimensions; + +import static org.testng.Assert.assertEquals; + +import org.testng.annotations.Test; + + +public class VeniceRequestRetryAbortReasonTest { + @Test + public void testRetryRequestAbortReason() { + for (VeniceRequestRetryAbortReason reason: VeniceRequestRetryAbortReason.values()) { + switch (reason) { + case RETRY_ABORTED_BY_SLOW_ROUTE: + assertEquals(reason.getAbortReason(), "slow_route"); + break; + case RETRY_ABORTED_BY_DELAY_CONSTRAINT: + assertEquals(reason.getAbortReason(), "delay_constraint"); + break; + case RETRY_ABORTED_BY_MAX_RETRY_ROUTE_LIMIT: + assertEquals(reason.getAbortReason(), "max_retry_router_limit"); + break; + case RETRY_ABORTED_BY_NO_AVAILABLE_REPLICA: + assertEquals(reason.getAbortReason(), "no_available_replica"); + break; + default: + throw new IllegalArgumentException("Unknown reason: " + reason); + } + } + } +} diff --git a/internal/venice-common/src/test/java/com/linkedin/venice/stats/dimensions/VeniceRequestRetryTypeTest.java b/internal/venice-common/src/test/java/com/linkedin/venice/stats/dimensions/VeniceRequestRetryTypeTest.java new file mode 100644 index 0000000000..52c8bfb94c --- /dev/null +++ b/internal/venice-common/src/test/java/com/linkedin/venice/stats/dimensions/VeniceRequestRetryTypeTest.java @@ -0,0 +1,24 @@ +package com.linkedin.venice.stats.dimensions; + +import static org.testng.Assert.assertEquals; + +import org.testng.annotations.Test; + + +public class VeniceRequestRetryTypeTest { + @Test + public void testVeniceRequestRetryType() { + for (VeniceRequestRetryType retryType: VeniceRequestRetryType.values()) { + switch (retryType) { + case ERROR_RETRY: + assertEquals(retryType.getRetryType(), "error_retry"); + break; + case LONG_TAIL_RETRY: + assertEquals(retryType.getRetryType(), "long_tail_retry"); + break; + default: + throw new IllegalArgumentException("Unknown retry type: " + retryType); + } + } + } +} diff --git a/internal/venice-common/src/test/java/com/linkedin/venice/stats/dimensions/VeniceRequestValidationOutcomeTest.java b/internal/venice-common/src/test/java/com/linkedin/venice/stats/dimensions/VeniceRequestValidationOutcomeTest.java new file mode 100644 index 0000000000..28f804eab5 --- /dev/null +++ b/internal/venice-common/src/test/java/com/linkedin/venice/stats/dimensions/VeniceRequestValidationOutcomeTest.java @@ -0,0 +1,24 @@ +package com.linkedin.venice.stats.dimensions; + +import static org.testng.Assert.assertEquals; + +import org.testng.annotations.Test; + + +public class VeniceRequestValidationOutcomeTest { + @Test + public void testVeniceRequestValidationOutcome() { + for (VeniceRequestValidationOutcome outcome: VeniceRequestValidationOutcome.values()) { + switch (outcome) { + case VALID: + assertEquals(outcome.getOutcome(), "valid"); + break; + case INVALID_KEY_COUNT_LIMIT_EXCEEDED: + assertEquals(outcome.getOutcome(), "invalid_key_count_limit_exceeded"); + break; + default: + throw new IllegalArgumentException("Unknown outcome: " + outcome); + } + } + } +} diff --git a/internal/venice-common/src/test/java/com/linkedin/venice/stats/dimensions/VeniceResponseStatusCategoryTest.java b/internal/venice-common/src/test/java/com/linkedin/venice/stats/dimensions/VeniceResponseStatusCategoryTest.java new file mode 100644 index 0000000000..22272d3576 --- /dev/null +++ b/internal/venice-common/src/test/java/com/linkedin/venice/stats/dimensions/VeniceResponseStatusCategoryTest.java @@ -0,0 +1,33 @@ +package com.linkedin.venice.stats.dimensions; + +import static org.testng.Assert.assertEquals; + +import org.testng.annotations.Test; + + +public class VeniceResponseStatusCategoryTest { + @Test + public void testVeniceResponseStatusCategory() { + for (VeniceResponseStatusCategory responseStatusCategory: VeniceResponseStatusCategory.values()) { + switch (responseStatusCategory) { + case HEALTHY: + assertEquals(responseStatusCategory.getCategory(), "healthy"); + break; + case UNHEALTHY: + assertEquals(responseStatusCategory.getCategory(), "unhealthy"); + break; + case TARDY: + assertEquals(responseStatusCategory.getCategory(), "tardy"); + break; + case THROTTLED: + assertEquals(responseStatusCategory.getCategory(), "throttled"); + break; + case BAD_REQUEST: + assertEquals(responseStatusCategory.getCategory(), "bad_request"); + break; + default: + throw new IllegalArgumentException("Unknown response status category: " + responseStatusCategory); + } + } + } +} diff --git a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/integration/utils/VeniceRouterWrapper.java b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/integration/utils/VeniceRouterWrapper.java index 23260f6058..b5092d71fa 100644 --- a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/integration/utils/VeniceRouterWrapper.java +++ b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/integration/utils/VeniceRouterWrapper.java @@ -60,6 +60,7 @@ public class VeniceRouterWrapper extends ProcessWrapper implements MetricsAware public static final String CLUSTER_DISCOVERY_D2_SERVICE_NAME = ClientConfig.DEFAULT_CLUSTER_DISCOVERY_D2_SERVICE_NAME + "_test"; private static final String ROUTER_SERVICE_NAME = "venice-router"; + private static final String ROUTER_SERVICE_METRIC_PREFIX = "router"; private final VeniceProperties properties; private final String zkAddress; private RouterServer service; @@ -152,6 +153,12 @@ static StatefulServiceProvider generateService( .put(MAX_READ_CAPACITY, DEFAULT_PER_ROUTER_READ_QUOTA) .put(SYSTEM_SCHEMA_CLUSTER_NAME, clusterName) .put(ROUTER_STORAGE_NODE_CLIENT_TYPE, StorageNodeClientType.APACHE_HTTP_ASYNC_CLIENT.name()) + .put("otel.venice.enabled", Boolean.TRUE.toString()) + .put("otel.venice.export.to.log", Boolean.TRUE.toString()) + .put("otel.venice.export.to.http.grpc.endpoint", Boolean.TRUE.toString()) + .put("otel.exporter.otlp.metrics.protocol", "http/protobuf") + .put("otel.exporter.otlp.metrics.endpoint", "http://localhost:4318/v1/metrics") + .put("otel.exporter.otlp.metrics.temporality.preference", "delta") .put(properties); // setup d2 config first @@ -175,7 +182,10 @@ static StatefulServiceProvider generateService( d2Servers, Optional.empty(), Optional.of(SslUtils.getVeniceLocalSslFactory()), - TehutiUtils.getMetricsRepository(ROUTER_SERVICE_NAME), + TehutiUtils.getVeniceMetricsRepository( + ROUTER_SERVICE_NAME, + ROUTER_SERVICE_METRIC_PREFIX, + routerProperties.getPropsMap()), D2TestUtils.getAndStartD2Client(zkAddress), CLUSTER_DISCOVERY_D2_SERVICE_NAME); return new VeniceRouterWrapper( @@ -237,7 +247,8 @@ protected void newProcess() { d2Servers, Optional.empty(), Optional.of(SslUtils.getVeniceLocalSslFactory()), - TehutiUtils.getMetricsRepository(ROUTER_SERVICE_NAME), + TehutiUtils + .getVeniceMetricsRepository(ROUTER_SERVICE_NAME, ROUTER_SERVICE_METRIC_PREFIX, properties.getPropsMap()), D2TestUtils.getAndStartD2Client(zkAddress), CLUSTER_DISCOVERY_D2_SERVICE_NAME); LOGGER.info("Started VeniceRouterWrapper: {}", this); diff --git a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/router/api/TestVeniceDispatcher.java b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/router/api/TestVeniceDispatcher.java index a3a73dc59f..1349a56226 100644 --- a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/router/api/TestVeniceDispatcher.java +++ b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/router/api/TestVeniceDispatcher.java @@ -37,6 +37,7 @@ import com.linkedin.venice.router.stats.RouteHttpRequestStats; import com.linkedin.venice.router.stats.RouterStats; import com.linkedin.venice.schema.avro.ReadAvroProtocolDefinition; +import com.linkedin.venice.stats.VeniceMetricsRepository; import com.linkedin.venice.utils.TestUtils; import io.netty.handler.codec.http.DefaultFullHttpResponse; import io.netty.handler.codec.http.DefaultHttpHeaders; @@ -46,7 +47,6 @@ import io.netty.handler.codec.http.HttpMethod; import io.netty.handler.codec.http.HttpResponseStatus; import io.netty.handler.codec.http.HttpVersion; -import io.tehuti.metrics.MetricsRepository; import java.io.IOException; import java.nio.ByteBuffer; import java.util.ArrayList; @@ -322,7 +322,7 @@ private VeniceDispatcher getMockDispatcher(boolean forcePendingCheck, boolean fo doReturn(TimeUnit.MINUTES.toMillis(1)).when(routerConfig).getLeakedFutureCleanupThresholdMs(); doReturn(24).when(routerConfig).getIoThreadCountInPoolMode(); ReadOnlyStoreRepository mockStoreRepo = mock(ReadOnlyStoreRepository.class); - MetricsRepository mockMetricsRepo = new MetricsRepository(); + VeniceMetricsRepository mockMetricsRepo = new VeniceMetricsRepository(); RouterStats mockRouterStats = mock(RouterStats.class); RouteHttpRequestStats routeHttpRequestStats = mock(RouteHttpRequestStats.class); when(mockRouterStats.getStatsByType(any())).thenReturn(mock(AggRouterHttpRequestStats.class)); diff --git a/services/venice-router/build.gradle b/services/venice-router/build.gradle index 1eda645461..e1bf3cef4e 100644 --- a/services/venice-router/build.gradle +++ b/services/venice-router/build.gradle @@ -64,6 +64,10 @@ dependencies { implementation libraries.httpAsyncClient implementation project(':internal:alpini:router:alpini-router-api') implementation project(':internal:alpini:router:alpini-router-impl') + implementation libraries.opentelemetryApi + implementation libraries.opentelemetrySdk + implementation libraries.opentelemetryExporterLogging + implementation libraries.opentelemetryExporterOtlp testImplementation project(':clients:venice-thin-client') testImplementation libraries.kafkaClientsTest // TODO: Get rid of Kafka dependency in venice-common (used by TopicCreator) diff --git a/services/venice-router/src/main/java/com/linkedin/venice/router/RouterServer.java b/services/venice-router/src/main/java/com/linkedin/venice/router/RouterServer.java index c70f2bedd7..eea6ce6d35 100644 --- a/services/venice-router/src/main/java/com/linkedin/venice/router/RouterServer.java +++ b/services/venice-router/src/main/java/com/linkedin/venice/router/RouterServer.java @@ -82,6 +82,8 @@ import com.linkedin.venice.stats.TehutiUtils; import com.linkedin.venice.stats.ThreadPoolStats; import com.linkedin.venice.stats.VeniceJVMStats; +import com.linkedin.venice.stats.VeniceMetricsConfig; +import com.linkedin.venice.stats.VeniceMetricsRepository; import com.linkedin.venice.stats.ZkClientStatusStats; import com.linkedin.venice.throttle.EventThrottler; import com.linkedin.venice.utils.DaemonThreadFactory; @@ -135,7 +137,7 @@ public class RouterServer extends AbstractVeniceService { private static final String ROUTER_RETRY_MANAGER_THREAD_PREFIX = "Router-retry-manager-thread"; // Immutable state private final List serviceDiscoveryAnnouncers; - private final MetricsRepository metricsRepository; + private final VeniceMetricsRepository metricsRepository; private final Optional sslFactory; private final Optional accessController; @@ -194,6 +196,7 @@ public class RouterServer extends AbstractVeniceService { private final Map optionalChannelHandlers = new LinkedHashMap<>(); private static final String ROUTER_SERVICE_NAME = "venice-router"; + private static final String ROUTER_SERVICE_METRIC_PREFIX = "router"; /** * Thread number used to monitor the listening port; @@ -272,7 +275,8 @@ public RouterServer( serviceDiscoveryAnnouncers, accessController, sslFactory, - TehutiUtils.getMetricsRepository(ROUTER_SERVICE_NAME), + TehutiUtils + .getVeniceMetricsRepository(ROUTER_SERVICE_NAME, ROUTER_SERVICE_METRIC_PREFIX, properties.getPropsMap()), null, "venice-discovery"); } @@ -287,7 +291,7 @@ public RouterServer( List serviceDiscoveryAnnouncers, Optional accessController, Optional sslFactory, - MetricsRepository metricsRepository) { + VeniceMetricsRepository metricsRepository) { this( properties, serviceDiscoveryAnnouncers, @@ -306,6 +310,28 @@ public RouterServer( MetricsRepository metricsRepository, D2Client d2Client, String d2ServiceName) { + this( + properties, + serviceDiscoveryAnnouncers, + accessController, + sslFactory, + new VeniceMetricsRepository( + metricsRepository, + new VeniceMetricsConfig.VeniceMetricsConfigBuilder().setServiceName("venice-router") + .extractAndSetOtelConfigs(properties.getPropsMap()) + .build()), + d2Client, + d2ServiceName); + } + + public RouterServer( + VeniceProperties properties, + List serviceDiscoveryAnnouncers, + Optional accessController, + Optional sslFactory, + VeniceMetricsRepository metricsRepository, + D2Client d2Client, + String d2ServiceName) { this(properties, serviceDiscoveryAnnouncers, accessController, sslFactory, metricsRepository, true); HelixReadOnlyZKSharedSystemStoreRepository readOnlyZKSharedSystemStoreRepository = @@ -323,6 +349,7 @@ public RouterServer( this.routerStats = new RouterStats<>( requestType -> new AggRouterHttpRequestStats( metricsRepository, + config.getClusterName(), requestType, config.isKeyValueProfilingEnabled(), metadataRepository, @@ -366,7 +393,7 @@ private RouterServer( List serviceDiscoveryAnnouncers, Optional accessController, Optional sslFactory, - MetricsRepository metricsRepository, + VeniceMetricsRepository metricsRepository, boolean isCreateHelixManager) { config = new VeniceRouterConfig(properties); zkClient = @@ -381,7 +408,7 @@ private RouterServer( this.metaStoreShadowReader = Optional.empty(); this.metricsRepository = metricsRepository; - this.aggHostHealthStats = new AggHostHealthStats(metricsRepository); + this.aggHostHealthStats = new AggHostHealthStats(metricsRepository, config.getClusterName()); this.serviceDiscoveryAnnouncers = serviceDiscoveryAnnouncers; this.accessController = accessController; @@ -409,13 +436,23 @@ public RouterServer( List serviceDiscoveryAnnouncers, Optional sslFactory, HelixLiveInstanceMonitor liveInstanceMonitor) { - this(properties, serviceDiscoveryAnnouncers, Optional.empty(), sslFactory, new MetricsRepository(), false); + this( + properties, + serviceDiscoveryAnnouncers, + Optional.empty(), + sslFactory, + new VeniceMetricsRepository( + new VeniceMetricsConfig.VeniceMetricsConfigBuilder().setServiceName("venice-router") + .extractAndSetOtelConfigs(properties.getPropsMap()) + .build()), + false); this.routingDataRepository = routingDataRepository; this.hybridStoreQuotaRepository = hybridStoreQuotaRepository; this.metadataRepository = metadataRepository; this.routerStats = new RouterStats<>( requestType -> new AggRouterHttpRequestStats( metricsRepository, + config.getClusterName(), requestType, config.isKeyValueProfilingEnabled(), metadataRepository, diff --git a/services/venice-router/src/main/java/com/linkedin/venice/router/api/RouterExceptionAndTrackingUtils.java b/services/venice-router/src/main/java/com/linkedin/venice/router/api/RouterExceptionAndTrackingUtils.java index 58cf615980..5c79e02630 100644 --- a/services/venice-router/src/main/java/com/linkedin/venice/router/api/RouterExceptionAndTrackingUtils.java +++ b/services/venice-router/src/main/java/com/linkedin/venice/router/api/RouterExceptionAndTrackingUtils.java @@ -154,7 +154,7 @@ private static void metricTracking( // If we don't know the actual store name, this error will only be aggregated in server level, but not // in store level if (responseStatus.equals(BAD_REQUEST) || responseStatus.equals(REQUEST_ENTITY_TOO_LARGE)) { - stats.recordBadRequest(storeName.orElse(null)); + stats.recordBadRequest(storeName.orElse(null), responseStatus); } else if (responseStatus.equals(TOO_MANY_REQUESTS)) { if (storeName.isPresent()) { if (requestType.isPresent()) { @@ -165,7 +165,7 @@ private static void metricTracking( * * TODO: Remove this metric after the above work is done... */ - stats.recordThrottledRequest(storeName.get()); + stats.recordThrottledRequest(storeName.get(), responseStatus); } } else { // not possible to have empty store name in this scenario @@ -198,7 +198,7 @@ private static void metricTracking( return; } - stats.recordUnhealthyRequest(storeName.orElse(null)); + stats.recordUnhealthyRequest(storeName.orElse(null), responseStatus); if (responseStatus.equals(SERVICE_UNAVAILABLE)) { if (storeName.isPresent()) { diff --git a/services/venice-router/src/main/java/com/linkedin/venice/router/api/VeniceDispatcher.java b/services/venice-router/src/main/java/com/linkedin/venice/router/api/VeniceDispatcher.java index cf75f003e4..0a3ccefd44 100644 --- a/services/venice-router/src/main/java/com/linkedin/venice/router/api/VeniceDispatcher.java +++ b/services/venice-router/src/main/java/com/linkedin/venice/router/api/VeniceDispatcher.java @@ -30,6 +30,7 @@ import com.linkedin.venice.router.stats.RouterStats; import com.linkedin.venice.router.streaming.VeniceChunkedResponse; import com.linkedin.venice.router.throttle.PendingRequestThrottler; +import com.linkedin.venice.stats.VeniceMetricsRepository; import com.linkedin.venice.utils.LatencyUtils; import com.linkedin.venice.utils.Pair; import com.linkedin.venice.utils.Utils; @@ -40,7 +41,6 @@ import io.netty.handler.codec.http.HttpHeaderNames; import io.netty.handler.codec.http.HttpResponseStatus; import io.netty.handler.codec.http.HttpVersion; -import io.tehuti.metrics.MetricsRepository; import java.io.IOException; import java.util.Collections; import java.util.List; @@ -95,7 +95,7 @@ public VeniceDispatcher( VeniceRouterConfig config, ReadOnlyStoreRepository storeRepository, RouterStats perStoreStatsByType, - MetricsRepository metricsRepository, + VeniceMetricsRepository metricsRepository, StorageNodeClient storageNodeClient, RouteHttpRequestStats routeHttpRequestStats, AggHostHealthStats aggHostHealthStats, diff --git a/services/venice-router/src/main/java/com/linkedin/venice/router/api/VenicePathParser.java b/services/venice-router/src/main/java/com/linkedin/venice/router/api/VenicePathParser.java index 5f1e81b68f..199b5a548a 100644 --- a/services/venice-router/src/main/java/com/linkedin/venice/router/api/VenicePathParser.java +++ b/services/venice-router/src/main/java/com/linkedin/venice/router/api/VenicePathParser.java @@ -34,11 +34,11 @@ import com.linkedin.venice.router.stats.RouterStats; import com.linkedin.venice.router.streaming.VeniceChunkedWriteHandler; import com.linkedin.venice.router.utils.VeniceRouterUtils; +import com.linkedin.venice.stats.VeniceMetricsRepository; import com.linkedin.venice.streaming.StreamingUtils; import com.linkedin.venice.utils.concurrent.VeniceConcurrentHashMap; import io.netty.channel.ChannelHandlerContext; import io.netty.handler.codec.http.HttpResponseStatus; -import io.tehuti.metrics.MetricsRepository; import java.util.Collection; import java.util.Collections; import java.util.Map; @@ -114,7 +114,7 @@ public class VenicePathParser private final ReadOnlyStoreRepository storeRepository; private final VeniceRouterConfig routerConfig; private final CompressorFactory compressorFactory; - private final MetricsRepository metricsRepository; + private final VeniceMetricsRepository metricsRepository; private final ScheduledExecutorService retryManagerScheduler; private final Map routerSingleKeyRetryManagers; private final Map routerMultiKeyRetryManagers; @@ -134,7 +134,7 @@ public VenicePathParser( ReadOnlyStoreRepository storeRepository, VeniceRouterConfig routerConfig, CompressorFactory compressorFactory, - MetricsRepository metricsRepository, + VeniceMetricsRepository metricsRepository, ScheduledExecutorService retryManagerScheduler) { this.versionFinder = versionFinder; this.partitionFinder = partitionFinder; diff --git a/services/venice-router/src/main/java/com/linkedin/venice/router/api/VeniceResponseAggregator.java b/services/venice-router/src/main/java/com/linkedin/venice/router/api/VeniceResponseAggregator.java index a883dc0970..1275ff4c01 100644 --- a/services/venice-router/src/main/java/com/linkedin/venice/router/api/VeniceResponseAggregator.java +++ b/services/venice-router/src/main/java/com/linkedin/venice/router/api/VeniceResponseAggregator.java @@ -230,7 +230,7 @@ public FullHttpResponse buildResponse( } } - HttpResponseStatus responseStatus = finalResponse.status(); + HttpResponseStatus httpResponseStatus = finalResponse.status(); Map allMetrics = metrics.getMetrics(); /** * All the metrics in {@link com.linkedin.ddsstorage.router.api.MetricNames} are supported in {@link Metrics}. @@ -243,21 +243,20 @@ public FullHttpResponse buildResponse( // TODO: When a batch get throws a quota exception, the ROUTER_SERVER_TIME is missing, so we can't record anything // here... double latency = LatencyUtils.convertNSToMS(timeValue.getRawValue(TimeUnit.NANOSECONDS)); - stats.recordLatency(storeName, latency); - if (HEALTHY_STATUSES.contains(responseStatus)) { + if (HEALTHY_STATUSES.contains(httpResponseStatus)) { routerStats.getStatsByType(RequestType.SINGLE_GET) .recordReadQuotaUsage(storeName, venicePath.getPartitionKeys().size()); if (isFastRequest(latency, requestType)) { - stats.recordHealthyRequest(storeName, latency); + stats.recordHealthyRequest(storeName, latency, httpResponseStatus); } else { - stats.recordTardyRequest(storeName, latency); + stats.recordTardyRequest(storeName, latency, httpResponseStatus); } - } else if (responseStatus.equals(TOO_MANY_REQUESTS)) { + } else if (httpResponseStatus.equals(TOO_MANY_REQUESTS)) { LOGGER.debug("request is rejected by storage node because quota is exceeded"); - stats.recordThrottledRequest(storeName, latency); + stats.recordThrottledRequest(storeName, latency, httpResponseStatus); } else { - LOGGER.debug("Unhealthy request detected, latency: {}ms, response status: {}", latency, responseStatus); - stats.recordUnhealthyRequest(storeName, latency); + LOGGER.debug("Unhealthy request detected, latency: {}ms, response status: {}", latency, httpResponseStatus); + stats.recordUnhealthyRequest(storeName, latency, httpResponseStatus); } } timeValue = allMetrics.get(ROUTER_RESPONSE_WAIT_TIME); @@ -275,7 +274,7 @@ public FullHttpResponse buildResponse( double routingTime = LatencyUtils.convertNSToMS(timeValue.getRawValue(TimeUnit.NANOSECONDS)); stats.recordRequestRoutingLatency(storeName, routingTime); } - if (HEALTHY_STATUSES.contains(responseStatus) && !venicePath.isStreamingRequest()) { + if (HEALTHY_STATUSES.contains(httpResponseStatus) && !venicePath.isStreamingRequest()) { // Only record successful response stats.recordResponseSize(storeName, finalResponse.content().readableBytes()); } diff --git a/services/venice-router/src/main/java/com/linkedin/venice/router/api/VeniceVersionFinder.java b/services/venice-router/src/main/java/com/linkedin/venice/router/api/VeniceVersionFinder.java index e8670193a0..a40e5c0e6e 100644 --- a/services/venice-router/src/main/java/com/linkedin/venice/router/api/VeniceVersionFinder.java +++ b/services/venice-router/src/main/java/com/linkedin/venice/router/api/VeniceVersionFinder.java @@ -19,9 +19,9 @@ import com.linkedin.venice.router.stats.RouterCurrentVersionStats; import com.linkedin.venice.router.stats.StaleVersionReason; import com.linkedin.venice.router.stats.StaleVersionStats; +import com.linkedin.venice.stats.VeniceMetricsRepository; import com.linkedin.venice.utils.RedundantExceptionFilter; import com.linkedin.venice.utils.concurrent.VeniceConcurrentHashMap; -import io.tehuti.metrics.MetricsRepository; import java.util.List; import java.util.Map; import java.util.Optional; @@ -51,7 +51,7 @@ public class VeniceVersionFinder { private final HelixBaseRoutingRepository routingDataRepository; private final CompressorFactory compressorFactory; - private final MetricsRepository metricsRepository; + private final VeniceMetricsRepository metricsRepository; public VeniceVersionFinder( ReadOnlyStoreRepository metadataRepository, @@ -61,7 +61,7 @@ public VeniceVersionFinder( Map clusterToD2Map, String clusterName, CompressorFactory compressorFactory, - MetricsRepository metricsRepository) { + VeniceMetricsRepository metricsRepository) { this.metadataRepository = metadataRepository; this.routingDataRepository = routingDataRepository; this.stats = stats; diff --git a/services/venice-router/src/main/java/com/linkedin/venice/router/api/routing/helix/HelixGroupSelector.java b/services/venice-router/src/main/java/com/linkedin/venice/router/api/routing/helix/HelixGroupSelector.java index c36023b87b..452d3f7299 100644 --- a/services/venice-router/src/main/java/com/linkedin/venice/router/api/routing/helix/HelixGroupSelector.java +++ b/services/venice-router/src/main/java/com/linkedin/venice/router/api/routing/helix/HelixGroupSelector.java @@ -4,7 +4,7 @@ import com.linkedin.venice.exceptions.VeniceException; import com.linkedin.venice.helix.HelixInstanceConfigRepository; import com.linkedin.venice.router.stats.HelixGroupStats; -import io.tehuti.metrics.MetricsRepository; +import com.linkedin.venice.stats.VeniceMetricsRepository; import java.util.concurrent.TimeUnit; @@ -26,7 +26,7 @@ public class HelixGroupSelector implements HelixGroupSelectionStrategy { private final HelixGroupStats helixGroupStats; public HelixGroupSelector( - MetricsRepository metricsRepository, + VeniceMetricsRepository metricsRepository, HelixInstanceConfigRepository instanceConfigRepository, HelixGroupSelectionStrategyEnum strategyEnum, TimeoutProcessor timeoutProcessor) { diff --git a/services/venice-router/src/main/java/com/linkedin/venice/router/httpclient/ApacheHttpAsyncStorageNodeClient.java b/services/venice-router/src/main/java/com/linkedin/venice/router/httpclient/ApacheHttpAsyncStorageNodeClient.java index c6f1395c0f..40479dd457 100644 --- a/services/venice-router/src/main/java/com/linkedin/venice/router/httpclient/ApacheHttpAsyncStorageNodeClient.java +++ b/services/venice-router/src/main/java/com/linkedin/venice/router/httpclient/ApacheHttpAsyncStorageNodeClient.java @@ -14,12 +14,12 @@ import com.linkedin.venice.service.AbstractVeniceService; import com.linkedin.venice.stats.DnsLookupStats; import com.linkedin.venice.stats.HttpConnectionPoolStats; +import com.linkedin.venice.stats.VeniceMetricsRepository; import com.linkedin.venice.utils.DaemonThreadFactory; import com.linkedin.venice.utils.Utils; import com.linkedin.venice.utils.concurrent.VeniceConcurrentHashMap; import io.netty.buffer.ByteBuf; import io.netty.buffer.Unpooled; -import io.tehuti.metrics.MetricsRepository; import java.io.IOException; import java.io.InputStream; import java.util.ArrayList; @@ -83,7 +83,7 @@ public class ApacheHttpAsyncStorageNodeClient implements StorageNodeClient { public ApacheHttpAsyncStorageNodeClient( VeniceRouterConfig config, Optional sslFactory, - MetricsRepository metricsRepository, + VeniceMetricsRepository metricsRepository, LiveInstanceMonitor monitor) { int totalIOThreadNum = config.getIoThreadCountInPoolMode(); diff --git a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/AdminOperationsStats.java b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/AdminOperationsStats.java index aeeed721e3..675177df66 100644 --- a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/AdminOperationsStats.java +++ b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/AdminOperationsStats.java @@ -2,7 +2,7 @@ import com.linkedin.venice.router.VeniceRouterConfig; import com.linkedin.venice.stats.AbstractVeniceStats; -import io.tehuti.metrics.MetricsRepository; +import com.linkedin.venice.stats.VeniceMetricsRepository; import io.tehuti.metrics.Sensor; import io.tehuti.metrics.stats.AsyncGauge; import io.tehuti.metrics.stats.Count; @@ -12,7 +12,7 @@ public class AdminOperationsStats extends AbstractVeniceStats { private final Sensor adminRequestSensor; private final Sensor errorAdminRequestSensor; - public AdminOperationsStats(MetricsRepository metricsRepository, String name, VeniceRouterConfig config) { + public AdminOperationsStats(VeniceMetricsRepository metricsRepository, String name, VeniceRouterConfig config) { super(metricsRepository, name); adminRequestSensor = registerSensorIfAbsent("admin_request", new Count()); errorAdminRequestSensor = registerSensorIfAbsent("error_admin_request", new Count()); diff --git a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/AggHostHealthStats.java b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/AggHostHealthStats.java index cc72755409..61506493cd 100644 --- a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/AggHostHealthStats.java +++ b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/AggHostHealthStats.java @@ -2,18 +2,15 @@ import com.linkedin.venice.stats.AbstractVeniceAggStats; import com.linkedin.venice.stats.StatsUtils; -import com.linkedin.venice.utils.concurrent.VeniceConcurrentHashMap; -import io.tehuti.metrics.MetricsRepository; -import java.util.Map; +import com.linkedin.venice.stats.VeniceMetricsRepository; public class AggHostHealthStats extends AbstractVeniceAggStats { - private final Map hostHealthStatsMap = new VeniceConcurrentHashMap<>(); - - public AggHostHealthStats(MetricsRepository metricsRepository) { + public AggHostHealthStats(VeniceMetricsRepository metricsRepository, String clusterName) { super( + (repo, hostName, cluster) -> new HostHealthStats(repo, StatsUtils.convertHostnameToMetricName(hostName)), metricsRepository, - (repo, hostName) -> new HostHealthStats(repo, StatsUtils.convertHostnameToMetricName(hostName))); + clusterName); } private HostHealthStats getHostStats(String hostName) { diff --git a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/AggRouterHttpRequestStats.java b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/AggRouterHttpRequestStats.java index 517f1485e7..e39b072a19 100644 --- a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/AggRouterHttpRequestStats.java +++ b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/AggRouterHttpRequestStats.java @@ -5,8 +5,9 @@ import com.linkedin.venice.read.RequestType; import com.linkedin.venice.stats.AbstractVeniceAggStats; import com.linkedin.venice.stats.AbstractVeniceAggStoreStats; +import com.linkedin.venice.stats.VeniceMetricsRepository; import com.linkedin.venice.utils.concurrent.VeniceConcurrentHashMap; -import io.tehuti.metrics.MetricsRepository; +import io.netty.handler.codec.http.HttpResponseStatus; import java.util.Map; import java.util.function.Function; @@ -15,25 +16,33 @@ public class AggRouterHttpRequestStats extends AbstractVeniceAggStoreStats scatterGatherStatsMap = new VeniceConcurrentHashMap<>(); public AggRouterHttpRequestStats( - MetricsRepository metricsRepository, + VeniceMetricsRepository metricsRepository, + String clusterName, RequestType requestType, ReadOnlyStoreRepository metadataRepository, boolean isUnregisterMetricForDeletedStoreEnabled) { - this(metricsRepository, requestType, false, metadataRepository, isUnregisterMetricForDeletedStoreEnabled); + this( + metricsRepository, + clusterName, + requestType, + false, + metadataRepository, + isUnregisterMetricForDeletedStoreEnabled); } public AggRouterHttpRequestStats( - MetricsRepository metricsRepository, + VeniceMetricsRepository metricsRepository, + String cluster, RequestType requestType, boolean isKeyValueProfilingEnabled, ReadOnlyStoreRepository metadataRepository, boolean isUnregisterMetricForDeletedStoreEnabled) { - super(metricsRepository, metadataRepository, isUnregisterMetricForDeletedStoreEnabled); + super(metricsRepository, cluster, metadataRepository, isUnregisterMetricForDeletedStoreEnabled); /** * Use a setter function to bypass the restriction that the supertype constructor could not * touch member fields of current object. */ - setStatsSupplier((metricsRepo, storeName) -> { + setStatsSupplier((metricsRepo, storeName, clusterName) -> { ScatterGatherStats stats; if (storeName.equals(AbstractVeniceAggStats.STORE_NAME_FOR_TOTAL_STAT)) { stats = new AggScatterGatherStats(); @@ -41,7 +50,13 @@ public AggRouterHttpRequestStats( stats = scatterGatherStatsMap.computeIfAbsent(storeName, k -> new ScatterGatherStats()); } - return new RouterHttpRequestStats(metricsRepo, storeName, requestType, stats, isKeyValueProfilingEnabled); + return new RouterHttpRequestStats( + metricsRepo, + storeName, + clusterName, + requestType, + stats, + isKeyValueProfilingEnabled); }); } @@ -50,19 +65,19 @@ public ScatterGatherStats getScatterGatherStatsForStore(String storeName) { } public void recordRequest(String storeName) { - totalStats.recordRequest(); - getStoreStats(storeName).recordRequest(); + totalStats.recordIncomingRequest(); + getStoreStats(storeName).recordIncomingRequest(); } - public void recordHealthyRequest(String storeName, double latency) { - totalStats.recordHealthyRequest(latency); - getStoreStats(storeName).recordHealthyRequest(latency); + public void recordHealthyRequest(String storeName, double latency, HttpResponseStatus responseStatus) { + totalStats.recordHealthyRequest(latency, responseStatus); + getStoreStats(storeName).recordHealthyRequest(latency, responseStatus); } - public void recordUnhealthyRequest(String storeName) { - totalStats.recordUnhealthyRequest(); + public void recordUnhealthyRequest(String storeName, HttpResponseStatus responseStatus) { + totalStats.recordUnhealthyRequest(responseStatus); if (storeName != null) { - getStoreStats(storeName).recordUnhealthyRequest(); + getStoreStats(storeName).recordUnhealthyRequest(responseStatus); } } @@ -71,10 +86,10 @@ public void recordUnavailableReplicaStreamingRequest(String storeName) { getStoreStats(storeName).recordUnavailableReplicaStreamingRequest(); } - public void recordUnhealthyRequest(String storeName, double latency) { - totalStats.recordUnhealthyRequest(latency); + public void recordUnhealthyRequest(String storeName, double latency, HttpResponseStatus responseStatus) { + totalStats.recordUnhealthyRequest(latency, responseStatus); if (storeName != null) { - getStoreStats(storeName).recordUnhealthyRequest(latency); + getStoreStats(storeName).recordUnhealthyRequest(latency, responseStatus); } } @@ -89,9 +104,9 @@ public void recordReadQuotaUsage(String storeName, int quotaUsage) { getStoreStats(storeName).recordReadQuotaUsage(quotaUsage); } - public void recordTardyRequest(String storeName, double latency) { - totalStats.recordTardyRequest(latency); - getStoreStats(storeName).recordTardyRequest(latency); + public void recordTardyRequest(String storeName, double latency, HttpResponseStatus responseStatus) { + totalStats.recordTardyRequest(latency, responseStatus); + getStoreStats(storeName).recordTardyRequest(latency, responseStatus); } /** @@ -101,20 +116,20 @@ public void recordTardyRequest(String storeName, double latency) { * * TODO: Remove this overload after fixing the above. */ - public void recordThrottledRequest(String storeName) { - totalStats.recordThrottledRequest(); - getStoreStats(storeName).recordThrottledRequest(); + public void recordThrottledRequest(String storeName, HttpResponseStatus httpResponseStatus) { + totalStats.recordThrottledRequest(httpResponseStatus); + getStoreStats(storeName).recordThrottledRequest(httpResponseStatus); } - public void recordThrottledRequest(String storeName, double latency) { - totalStats.recordThrottledRequest(latency); - getStoreStats(storeName).recordThrottledRequest(latency); + public void recordThrottledRequest(String storeName, double latency, HttpResponseStatus httpResponseStatus) { + totalStats.recordThrottledRequest(latency, httpResponseStatus); + getStoreStats(storeName).recordThrottledRequest(latency, httpResponseStatus); } - public void recordBadRequest(String storeName) { - totalStats.recordBadRequest(); + public void recordBadRequest(String storeName, HttpResponseStatus responseStatus) { + totalStats.recordBadRequest(responseStatus); if (storeName != null) { - getStoreStats(storeName).recordBadRequest(); + getStoreStats(storeName).recordBadRequest(responseStatus); } } @@ -146,7 +161,9 @@ public void recordFanoutRequestCount(String storeName, int count) { public void recordLatency(String storeName, double latency) { totalStats.recordLatency(latency); - getStoreStats(storeName).recordLatency(latency); + if (storeName != null) { + getStoreStats(storeName).recordLatency(latency); + } } public void recordResponseWaitingTime(String storeName, double waitingTime) { diff --git a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/HealthCheckStats.java b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/HealthCheckStats.java index 53a269c74b..55d95dff70 100644 --- a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/HealthCheckStats.java +++ b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/HealthCheckStats.java @@ -1,7 +1,7 @@ package com.linkedin.venice.router.stats; import com.linkedin.venice.stats.AbstractVeniceStats; -import io.tehuti.metrics.MetricsRepository; +import com.linkedin.venice.stats.VeniceMetricsRepository; import io.tehuti.metrics.Sensor; import io.tehuti.metrics.stats.Count; @@ -10,7 +10,7 @@ public class HealthCheckStats extends AbstractVeniceStats { private final Sensor healthCheckRequestSensor; private final Sensor errorHealthCheckRequestSensor; - public HealthCheckStats(MetricsRepository metricsRepository, String name) { + public HealthCheckStats(VeniceMetricsRepository metricsRepository, String name) { super(metricsRepository, name); healthCheckRequestSensor = registerSensor("healthcheck_request", new Count()); errorHealthCheckRequestSensor = registerSensor("error_healthcheck_request", new Count()); diff --git a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/HelixGroupStats.java b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/HelixGroupStats.java index a248de31cf..70e7e7f8ec 100644 --- a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/HelixGroupStats.java +++ b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/HelixGroupStats.java @@ -2,8 +2,8 @@ import com.linkedin.venice.router.api.routing.helix.HelixGroupSelectionStrategy; import com.linkedin.venice.stats.AbstractVeniceStats; +import com.linkedin.venice.stats.VeniceMetricsRepository; import com.linkedin.venice.utils.concurrent.VeniceConcurrentHashMap; -import io.tehuti.metrics.MetricsRepository; import io.tehuti.metrics.Sensor; import io.tehuti.metrics.stats.AsyncGauge; import io.tehuti.metrics.stats.Avg; @@ -19,7 +19,7 @@ public class HelixGroupStats extends AbstractVeniceStats { private final Sensor minGroupPendingRequest; private final Sensor avgGroupPendingRequest; - public HelixGroupStats(MetricsRepository metricsRepository, HelixGroupSelectionStrategy strategy) { + public HelixGroupStats(VeniceMetricsRepository metricsRepository, HelixGroupSelectionStrategy strategy) { super(metricsRepository, "HelixGroupStats"); this.strategy = strategy; diff --git a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/HostHealthStats.java b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/HostHealthStats.java index 7b9d1b203b..8d8eafe4e6 100644 --- a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/HostHealthStats.java +++ b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/HostHealthStats.java @@ -2,7 +2,7 @@ import com.linkedin.venice.stats.AbstractVeniceAggStats; import com.linkedin.venice.stats.AbstractVeniceStats; -import io.tehuti.metrics.MetricsRepository; +import com.linkedin.venice.stats.VeniceMetricsRepository; import io.tehuti.metrics.Sensor; import io.tehuti.metrics.stats.Avg; import io.tehuti.metrics.stats.Count; @@ -28,7 +28,7 @@ public class HostHealthStats extends AbstractVeniceStats { private Optional unhealthyHostCountCausedByPendingQueueSensor = Optional.empty(); private Optional unhealthyHostCountCausedByHeartBeatSensor = Optional.empty(); - public HostHealthStats(MetricsRepository metricsRepository, String name) { + public HostHealthStats(VeniceMetricsRepository metricsRepository, String name) { super(metricsRepository, name); this.unhealthyHostOfflineInstance = registerSensor("unhealthy_host_offline_instance", new Count()); diff --git a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouteHttpRequestStats.java b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouteHttpRequestStats.java index e203fc4d89..6c999fd1df 100644 --- a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouteHttpRequestStats.java +++ b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouteHttpRequestStats.java @@ -3,8 +3,8 @@ import com.linkedin.venice.router.httpclient.StorageNodeClient; import com.linkedin.venice.stats.AbstractVeniceStats; import com.linkedin.venice.stats.StatsUtils; +import com.linkedin.venice.stats.VeniceMetricsRepository; import com.linkedin.venice.utils.concurrent.VeniceConcurrentHashMap; -import io.tehuti.metrics.MetricsRepository; import io.tehuti.metrics.Sensor; import io.tehuti.metrics.stats.Avg; import io.tehuti.metrics.stats.Max; @@ -20,11 +20,11 @@ * to {@link RouteHttpStats} which stores only per type stats. */ public class RouteHttpRequestStats { - private final MetricsRepository metricsRepository; + private final VeniceMetricsRepository metricsRepository; private final StorageNodeClient storageNodeClient; private final Map routeStatsMap = new VeniceConcurrentHashMap<>(); - public RouteHttpRequestStats(MetricsRepository metricsRepository, StorageNodeClient storageNodeClient) { + public RouteHttpRequestStats(VeniceMetricsRepository metricsRepository, StorageNodeClient storageNodeClient) { this.metricsRepository = metricsRepository; this.storageNodeClient = storageNodeClient; } @@ -58,7 +58,7 @@ static class InternalHostStats extends AbstractVeniceStats { private final Sensor unhealthyPendingRateSensor; private AtomicLong pendingRequestCount; - public InternalHostStats(MetricsRepository metricsRepository, String hostName) { + public InternalHostStats(VeniceMetricsRepository metricsRepository, String hostName) { super(metricsRepository, StatsUtils.convertHostnameToMetricName(hostName)); pendingRequestCount = new AtomicLong(); // pendingRequestCountSensor = diff --git a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouteHttpStats.java b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouteHttpStats.java index 90bc9f94c4..79de9c6892 100644 --- a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouteHttpStats.java +++ b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouteHttpStats.java @@ -3,8 +3,8 @@ import com.linkedin.venice.read.RequestType; import com.linkedin.venice.stats.AbstractVeniceHttpStats; import com.linkedin.venice.stats.TehutiUtils; +import com.linkedin.venice.stats.VeniceMetricsRepository; import com.linkedin.venice.utils.concurrent.VeniceConcurrentHashMap; -import io.tehuti.metrics.MetricsRepository; import io.tehuti.metrics.Sensor; import io.tehuti.metrics.stats.Count; import io.tehuti.metrics.stats.Max; @@ -13,10 +13,10 @@ public class RouteHttpStats { private final Map routeStatsMap = new VeniceConcurrentHashMap<>(); - private final MetricsRepository metricsRepository; + private final VeniceMetricsRepository metricsRepository; private final RequestType requestType; - public RouteHttpStats(MetricsRepository metricsRepository, RequestType requestType) { + public RouteHttpStats(VeniceMetricsRepository metricsRepository, RequestType requestType) { this.metricsRepository = metricsRepository; this.requestType = requestType; } @@ -31,7 +31,7 @@ static class InternalRouteHttpStats extends AbstractVeniceHttpStats { private final Sensor responseWaitingTimeSensor; private final Sensor requestSensor; - public InternalRouteHttpStats(MetricsRepository metricsRepository, String hostName, RequestType requestType) { + public InternalRouteHttpStats(VeniceMetricsRepository metricsRepository, String hostName, RequestType requestType) { super(metricsRepository, hostName.replace('.', '_'), requestType); requestSensor = registerSensor("request", new Count()); diff --git a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouterCurrentVersionStats.java b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouterCurrentVersionStats.java index 35dd99e619..57965d6b06 100644 --- a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouterCurrentVersionStats.java +++ b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouterCurrentVersionStats.java @@ -1,7 +1,7 @@ package com.linkedin.venice.router.stats; import com.linkedin.venice.stats.AbstractVeniceStats; -import io.tehuti.metrics.MetricsRepository; +import com.linkedin.venice.stats.VeniceMetricsRepository; import io.tehuti.metrics.Sensor; import io.tehuti.metrics.stats.Gauge; @@ -9,7 +9,7 @@ public class RouterCurrentVersionStats extends AbstractVeniceStats { private final Sensor currentVersionNumberSensor; - public RouterCurrentVersionStats(MetricsRepository metricsRepository, String name) { + public RouterCurrentVersionStats(VeniceMetricsRepository metricsRepository, String name) { super(metricsRepository, name); this.currentVersionNumberSensor = registerSensor("current_version", new Gauge(-1)); } diff --git a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouterHttpRequestStats.java b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouterHttpRequestStats.java index d53abc2177..9927ecbbd4 100644 --- a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouterHttpRequestStats.java +++ b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouterHttpRequestStats.java @@ -1,6 +1,15 @@ package com.linkedin.venice.router.stats; import static com.linkedin.venice.stats.AbstractVeniceAggStats.STORE_NAME_FOR_TOTAL_STAT; +import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.HTTP_RESPONSE_STATUS_CODE; +import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.HTTP_RESPONSE_STATUS_CODE_CATEGORY; +import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.VENICE_CLUSTER_NAME; +import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.VENICE_REQUEST_METHOD; +import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.VENICE_REQUEST_RETRY_ABORT_REASON; +import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.VENICE_REQUEST_RETRY_TYPE; +import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.VENICE_REQUEST_VALIDATION_OUTCOME; +import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.VENICE_RESPONSE_STATUS_CODE_CATEGORY; +import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.VENICE_STORE_NAME; import com.linkedin.alpini.router.monitoring.ScatterGatherStats; import com.linkedin.venice.common.VeniceSystemStoreUtils; @@ -8,10 +17,22 @@ import com.linkedin.venice.stats.AbstractVeniceHttpStats; import com.linkedin.venice.stats.LambdaStat; import com.linkedin.venice.stats.TehutiUtils; +import com.linkedin.venice.stats.VeniceMetricsConfig; +import com.linkedin.venice.stats.VeniceMetricsRepository; +import com.linkedin.venice.stats.VeniceOpenTelemetryMetricFormat; +import com.linkedin.venice.stats.dimensions.VeniceHttpResponseStatusCodeCategory; +import com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions; +import com.linkedin.venice.stats.dimensions.VeniceRequestRetryAbortReason; +import com.linkedin.venice.stats.dimensions.VeniceRequestRetryType; +import com.linkedin.venice.stats.dimensions.VeniceRequestValidationOutcome; +import com.linkedin.venice.stats.dimensions.VeniceResponseStatusCategory; +import io.netty.handler.codec.http.HttpResponseStatus; +import io.opentelemetry.api.common.Attributes; +import io.opentelemetry.api.metrics.DoubleHistogram; +import io.opentelemetry.api.metrics.LongCounter; import io.tehuti.Metric; import io.tehuti.metrics.MeasurableStat; import io.tehuti.metrics.MetricConfig; -import io.tehuti.metrics.MetricsRepository; import io.tehuti.metrics.Sensor; import io.tehuti.metrics.stats.Avg; import io.tehuti.metrics.stats.Count; @@ -27,92 +48,161 @@ public class RouterHttpRequestStats extends AbstractVeniceHttpStats { private static final MetricConfig METRIC_CONFIG = new MetricConfig().timeWindow(10, TimeUnit.SECONDS); - private static final MetricsRepository localMetricRepo = new MetricsRepository(METRIC_CONFIG); + private static final VeniceMetricsRepository localMetricRepo = new VeniceMetricsRepository( + new VeniceMetricsConfig.VeniceMetricsConfigBuilder().setTehutiMetricConfig(METRIC_CONFIG).build()); private final static Sensor totalInflightRequestSensor = localMetricRepo.sensor("total_inflight_request"); static { totalInflightRequestSensor.add("total_inflight_request_count", new Rate()); } - private final Sensor requestSensor; + + /** metrics to track incoming requests */ + private final Sensor incomingRequestSensor; + private final LongCounter incomingRequestSensorOtel; + + /** metrics to track response handling */ private final Sensor healthySensor; private final Sensor unhealthySensor; private final Sensor tardySensor; private final Sensor healthyRequestRateSensor; private final Sensor tardyRequestRatioSensor; private final Sensor throttleSensor; - private final Sensor errorRetryCountSensor; + private final Sensor badRequestSensor; + private final LongCounter requestSensorOtel; + /** latency metrics */ private final Sensor latencySensor; private final Sensor healthyRequestLatencySensor; private final Sensor unhealthyRequestLatencySensor; private final Sensor tardyRequestLatencySensor; private final Sensor throttledRequestLatencySensor; + private final DoubleHistogram latencySensorOtel; + + /** retry metrics */ + private final Sensor errorRetryCountSensor; + private final LongCounter retryTriggeredSensorOtel; + private final Sensor allowedRetryRequestSensor; + private final LongCounter allowedRetryRequestSensorOtel; + private final Sensor disallowedRetryRequestSensor; + private final LongCounter disallowedRetryRequestSensorOtel; + private final Sensor retryDelaySensor; + private final DoubleHistogram retryDelaySensorOtel; + + /** retry aborted metrics */ + private final Sensor delayConstraintAbortedRetryRequest; + private final Sensor slowRouteAbortedRetryRequest; + private final Sensor retryRouteLimitAbortedRetryRequest; + private final Sensor noAvailableReplicaAbortedRetryRequest; + private final LongCounter abortedRetrySensorOtel; + + /** key count metrics */ + private final Sensor keyNumSensor; + private final Sensor badRequestKeyCountSensor; + private final DoubleHistogram keyCountSensorOtel; + + /** OTel metrics yet to be added */ private final Sensor requestSizeSensor; private final Sensor compressedResponseSizeSensor; private final Sensor responseSizeSensor; - private final Sensor badRequestSensor; - private final Sensor badRequestKeyCountSensor; private final Sensor requestThrottledByRouterCapacitySensor; private final Sensor decompressionTimeSensor; private final Sensor routerResponseWaitingTimeSensor; private final Sensor fanoutRequestCountSensor; private final Sensor quotaSensor; private final Sensor findUnhealthyHostRequestSensor; - private final Sensor keyNumSensor; // Reflect the real request usage, e.g count each key as an unit of request usage. private final Sensor requestUsageSensor; private final Sensor requestParsingLatencySensor; private final Sensor requestRoutingLatencySensor; private final Sensor unAvailableRequestSensor; - private final Sensor delayConstraintAbortedRetryRequest; - private final Sensor slowRouteAbortedRetryRequest; - private final Sensor retryRouteLimitAbortedRetryRequest; - private final Sensor noAvailableReplicaAbortedRetryRequest; private final Sensor readQuotaUsageSensor; private final Sensor inFlightRequestSensor; private final AtomicInteger currentInFlightRequest; private final Sensor unavailableReplicaStreamingRequestSensor; - private final Sensor allowedRetryRequestSensor; - private final Sensor disallowedRetryRequestSensor; - private final Sensor errorRetryAttemptTriggeredByPendingRequestCheckSensor; - private final Sensor retryDelaySensor; private final Sensor multiGetFallbackSensor; private final Sensor metaStoreShadowReadSensor; private Sensor keySizeSensor; + + /** TODO: Need to clarify the usage and add new OTel metrics or add it as a part of existing ones */ + private final Sensor errorRetryAttemptTriggeredByPendingRequestCheckSensor; + private final String systemStoreName; + private final Attributes otelMetricDimensions; + private final boolean emitOpenTelemetryMetrics; + private final VeniceOpenTelemetryMetricFormat openTelemetryMetricFormat; // QPS metrics public RouterHttpRequestStats( - MetricsRepository metricsRepository, + VeniceMetricsRepository metricsRepository, String storeName, + String clusterName, RequestType requestType, ScatterGatherStats scatterGatherStats, boolean isKeyValueProfilingEnabled) { super(metricsRepository, storeName, requestType); + emitOpenTelemetryMetrics = metricsRepository.getVeniceMetricsConfig().isEmitOpenTelemetryMetrics(); + openTelemetryMetricFormat = metricsRepository.getVeniceMetricsConfig().getMetricFormat(); + otelMetricDimensions = Attributes.builder() + .put(getDimensionName(VENICE_STORE_NAME), storeName) + .put(getDimensionName(VENICE_REQUEST_METHOD), requestType.getRequestTypeName()) + .put(getDimensionName(VENICE_CLUSTER_NAME), clusterName) + .build(); + this.systemStoreName = VeniceSystemStoreUtils.extractSystemStoreType(storeName); Rate requestRate = new OccurrenceRate(); Rate healthyRequestRate = new OccurrenceRate(); Rate tardyRequestRate = new OccurrenceRate(); - requestSensor = registerSensor("request", new Count(), requestRate); + + incomingRequestSensor = registerSensor("request", new Count(), requestRate); + incomingRequestSensorOtel = metricsRepository.getOpenTelemetryMetricsRepository() + .getCounter("incoming_call_count", "Number", "Count of all incoming requests"); + healthySensor = registerSensor("healthy_request", new Count(), healthyRequestRate); unhealthySensor = registerSensor("unhealthy_request", new Count()); - unavailableReplicaStreamingRequestSensor = registerSensor("unavailable_replica_streaming_request", new Count()); tardySensor = registerSensor("tardy_request", new Count(), tardyRequestRate); + throttleSensor = registerSensor("throttled_request", new Count()); healthyRequestRateSensor = registerSensor(new TehutiUtils.SimpleRatioStat(healthyRequestRate, requestRate, "healthy_request_ratio")); tardyRequestRatioSensor = registerSensor(new TehutiUtils.SimpleRatioStat(tardyRequestRate, requestRate, "tardy_request_ratio")); - throttleSensor = registerSensor("throttled_request", new Count()); - errorRetryCountSensor = registerSensor("error_retry", new Count()); badRequestSensor = registerSensor("bad_request", new Count()); - badRequestKeyCountSensor = registerSensor("bad_request_key_count", new OccurrenceRate(), new Avg(), new Max()); + requestSensorOtel = metricsRepository.getOpenTelemetryMetricsRepository() + .getCounter("call_count", "Number", "Count of all requests with response details"); + + errorRetryCountSensor = registerSensor("error_retry", new Count()); + retryTriggeredSensorOtel = metricsRepository.getOpenTelemetryMetricsRepository() + .getCounter("retry_call_count", "Number", "Count of retries triggered"); + allowedRetryRequestSensor = registerSensor("allowed_retry_request_count", new OccurrenceRate()); + allowedRetryRequestSensorOtel = metricsRepository.getOpenTelemetryMetricsRepository() + .getCounter("allowed_retry_call_count", "Number", "Count of allowed retry requests"); + disallowedRetryRequestSensor = registerSensor("disallowed_retry_request_count", new OccurrenceRate()); + disallowedRetryRequestSensorOtel = metricsRepository.getOpenTelemetryMetricsRepository() + .getCounter("disallowed_retry_call_count", "Number", "Count of disallowed retry requests"); + errorRetryAttemptTriggeredByPendingRequestCheckSensor = + registerSensor("error_retry_attempt_triggered_by_pending_request_check", new OccurrenceRate()); + retryDelaySensor = registerSensor("retry_delay", new Avg(), new Max()); + retryDelaySensorOtel = metricsRepository.getOpenTelemetryMetricsRepository() + .getHistogramWithoutBuckets("retry_delay", TimeUnit.MILLISECONDS.name(), "Retry delay time"); + + delayConstraintAbortedRetryRequest = registerSensor("delay_constraint_aborted_retry_request", new Count()); + slowRouteAbortedRetryRequest = registerSensor("slow_route_aborted_retry_request", new Count()); + retryRouteLimitAbortedRetryRequest = registerSensor("retry_route_limit_aborted_retry_request", new Count()); + noAvailableReplicaAbortedRetryRequest = registerSensor("no_available_replica_aborted_retry_request", new Count()); + abortedRetrySensorOtel = metricsRepository.getOpenTelemetryMetricsRepository() + .getCounter("aborted_retry_call_count", "Number", "Count of aborted retry requests"); + + unavailableReplicaStreamingRequestSensor = registerSensor("unavailable_replica_streaming_request", new Count()); requestThrottledByRouterCapacitySensor = registerSensor("request_throttled_by_router_capacity", new Count()); fanoutRequestCountSensor = registerSensor("fanout_request_count", new Avg(), new Max(0)); + latencySensor = registerSensorWithDetailedPercentiles("latency", new Avg(), new Max(0)); healthyRequestLatencySensor = registerSensorWithDetailedPercentiles("healthy_request_latency", new Avg(), new Max(0)); unhealthyRequestLatencySensor = registerSensor("unhealthy_request_latency", new Avg(), new Max(0)); tardyRequestLatencySensor = registerSensor("tardy_request_latency", new Avg(), new Max(0)); throttledRequestLatencySensor = registerSensor("throttled_request_latency", new Avg(), new Max(0)); + latencySensorOtel = metricsRepository.getOpenTelemetryMetricsRepository() + .getHistogram("call_time", TimeUnit.MILLISECONDS.name(), "Latency based on all responses"); + routerResponseWaitingTimeSensor = registerSensor( "response_waiting_time", TehutiUtils.getPercentileStat(getName(), getFullMetricName("response_waiting_time"))); @@ -147,6 +237,10 @@ public RouterHttpRequestStats( "retry_faster_than_original_count")); keyNumSensor = registerSensor("key_num", new Avg(), new Max(0)); + badRequestKeyCountSensor = registerSensor("bad_request_key_count", new OccurrenceRate(), new Avg(), new Max()); + keyCountSensorOtel = metricsRepository.getOpenTelemetryMetricsRepository() + .getHistogramWithoutBuckets("call_key_count", "Number", "Count of keys in multi key requests"); + /** * request_usage.Total is incoming KPS while request_usage.OccurrenceRate is QPS */ @@ -158,11 +252,6 @@ public RouterHttpRequestStats( unAvailableRequestSensor = registerSensor("unavailable_request", new Count()); - delayConstraintAbortedRetryRequest = registerSensor("delay_constraint_aborted_retry_request", new Count()); - slowRouteAbortedRetryRequest = registerSensor("slow_route_aborted_retry_request", new Count()); - retryRouteLimitAbortedRetryRequest = registerSensor("retry_route_limit_aborted_retry_request", new Count()); - noAvailableReplicaAbortedRetryRequest = registerSensor("no_available_replica_aborted_retry_request", new Count()); - readQuotaUsageSensor = registerSensor("read_quota_usage_kps", new Total()); inFlightRequestSensor = registerSensor("in_flight_request_count", new Min(), new Max(0), new Avg()); @@ -189,42 +278,48 @@ public RouterHttpRequestStats( } currentInFlightRequest = new AtomicInteger(); - allowedRetryRequestSensor = registerSensor("allowed_retry_request_count", new OccurrenceRate()); - disallowedRetryRequestSensor = registerSensor("disallowed_retry_request_count", new OccurrenceRate()); - errorRetryAttemptTriggeredByPendingRequestCheckSensor = - registerSensor("error_retry_attempt_triggered_by_pending_request_check", new OccurrenceRate()); - retryDelaySensor = registerSensor("retry_delay", new Avg(), new Max()); metaStoreShadowReadSensor = registerSensor("meta_store_shadow_read", new OccurrenceRate()); } + private String getDimensionName(VeniceMetricsDimensions dimension) { + return dimension.getDimensionName(openTelemetryMetricFormat); + } + /** * We record this at the beginning of request handling, so we don't know the latency yet... All specific * types of requests also have their latencies logged at the same time. */ - public void recordRequest() { - requestSensor.record(); + public void recordIncomingRequest() { + incomingRequestSensor.record(); inFlightRequestSensor.record(currentInFlightRequest.incrementAndGet()); totalInflightRequestSensor.record(); + if (emitOpenTelemetryMetrics) { + incomingRequestSensorOtel.add(1, otelMetricDimensions); + } } - public void recordHealthyRequest(Double latency) { + public void recordHealthyRequest(Double latency, HttpResponseStatus responseStatus) { healthySensor.record(); + recordRequestSensorOtel(responseStatus, VeniceResponseStatusCategory.HEALTHY); if (latency != null) { healthyRequestLatencySensor.record(latency); + recordLatencySensorOtel(latency, responseStatus, VeniceResponseStatusCategory.HEALTHY); } } - public void recordUnhealthyRequest() { + public void recordUnhealthyRequest(HttpResponseStatus responseStatus) { unhealthySensor.record(); + recordRequestSensorOtel(responseStatus, VeniceResponseStatusCategory.UNHEALTHY); } - public void recordUnavailableReplicaStreamingRequest() { - unavailableReplicaStreamingRequestSensor.record(); + public void recordUnhealthyRequest(double latency, HttpResponseStatus responseStatus) { + recordUnhealthyRequest(responseStatus); + unhealthyRequestLatencySensor.record(latency); + recordLatencySensorOtel(latency, responseStatus, VeniceResponseStatusCategory.UNHEALTHY); } - public void recordUnhealthyRequest(double latency) { - recordUnhealthyRequest(); - unhealthyRequestLatencySensor.record(latency); + public void recordUnavailableReplicaStreamingRequest() { + unavailableReplicaStreamingRequestSensor.record(); } /** @@ -235,14 +330,17 @@ public void recordReadQuotaUsage(int quotaUsage) { readQuotaUsageSensor.record(quotaUsage); } - public void recordTardyRequest(double latency) { + public void recordTardyRequest(double latency, HttpResponseStatus responseStatus) { tardySensor.record(); + recordRequestSensorOtel(responseStatus, VeniceResponseStatusCategory.TARDY); tardyRequestLatencySensor.record(latency); + recordLatencySensorOtel(latency, responseStatus, VeniceResponseStatusCategory.TARDY); } - public void recordThrottledRequest(double latency) { - recordThrottledRequest(); + public void recordThrottledRequest(double latency, HttpResponseStatus responseStatus) { + recordThrottledRequest(responseStatus); throttledRequestLatencySensor.record(latency); + recordLatencySensorOtel(latency, responseStatus, VeniceResponseStatusCategory.THROTTLED); } /** @@ -252,20 +350,46 @@ public void recordThrottledRequest(double latency) { * * TODO: Remove this overload after fixing the above. */ - public void recordThrottledRequest() { + public void recordThrottledRequest(HttpResponseStatus responseStatus) { throttleSensor.record(); + recordRequestSensorOtel(responseStatus, VeniceResponseStatusCategory.THROTTLED); } public void recordErrorRetryCount() { errorRetryCountSensor.record(); + recordRetryTriggeredSensorOtel(VeniceRequestRetryType.ERROR_RETRY); + } + + public void recordRetryTriggeredSensorOtel(VeniceRequestRetryType retryType) { + if (emitOpenTelemetryMetrics) { + Attributes dimensions = Attributes.builder() + .putAll(otelMetricDimensions) + .put(getDimensionName(VENICE_REQUEST_RETRY_TYPE), retryType.getRetryType()) + .build(); + retryTriggeredSensorOtel.add(1, dimensions); + } + } + + public void recordAbortedRetrySensorOtel(VeniceRequestRetryAbortReason abortReason) { + if (emitOpenTelemetryMetrics) { + Attributes dimensions = Attributes.builder() + .putAll(otelMetricDimensions) + .put(getDimensionName(VENICE_REQUEST_RETRY_ABORT_REASON), abortReason.getAbortReason()) + .build(); + abortedRetrySensorOtel.add(1, dimensions); + } } - public void recordBadRequest() { + public void recordBadRequest(HttpResponseStatus responseStatus) { badRequestSensor.record(); + recordRequestSensorOtel(responseStatus, VeniceResponseStatusCategory.BAD_REQUEST); } public void recordBadRequestKeyCount(int keyCount) { badRequestKeyCountSensor.record(keyCount); + if (emitOpenTelemetryMetrics) { + recordKeyCountSensorOtel(keyCount, VeniceRequestValidationOutcome.INVALID_KEY_COUNT_LIMIT_EXCEEDED); + } } public void recordRequestThrottledByRouterCapacity() { @@ -282,6 +406,39 @@ public void recordLatency(double latency) { latencySensor.record(latency); } + public void recordLatencySensorOtel( + double latency, + HttpResponseStatus responseStatus, + VeniceResponseStatusCategory veniceResponseStatusCategory) { + if (emitOpenTelemetryMetrics) { + Attributes dimensions = Attributes.builder() + .putAll(otelMetricDimensions) + // only add HTTP_RESPONSE_STATUS_CODE_CATEGORY to reduce the cardinality for histogram + .put( + getDimensionName(HTTP_RESPONSE_STATUS_CODE_CATEGORY), + VeniceHttpResponseStatusCodeCategory.valueOf(responseStatus.code()).getCategory()) + .put(getDimensionName(VENICE_RESPONSE_STATUS_CODE_CATEGORY), veniceResponseStatusCategory.getCategory()) + .build(); + latencySensorOtel.record(latency, dimensions); + } + } + + public void recordRequestSensorOtel( + HttpResponseStatus responseStatus, + VeniceResponseStatusCategory veniceResponseStatusCategory) { + if (emitOpenTelemetryMetrics) { + Attributes dimensions = Attributes.builder() + .putAll(otelMetricDimensions) + .put( + getDimensionName(HTTP_RESPONSE_STATUS_CODE_CATEGORY), + VeniceHttpResponseStatusCodeCategory.valueOf(responseStatus.code()).getCategory()) + .put(getDimensionName(VENICE_RESPONSE_STATUS_CODE_CATEGORY), veniceResponseStatusCategory.getCategory()) + .put(getDimensionName(HTTP_RESPONSE_STATUS_CODE), responseStatus.codeAsText().toString()) + .build(); + requestSensorOtel.add(1, dimensions); + } + } + public void recordResponseWaitingTime(double waitingTime) { routerResponseWaitingTimeSensor.record(waitingTime); } @@ -312,6 +469,20 @@ public void recordFindUnhealthyHostRequest() { public void recordKeyNum(int keyNum) { keyNumSensor.record(keyNum); + if (emitOpenTelemetryMetrics) { + recordKeyCountSensorOtel(keyNum, VeniceRequestValidationOutcome.VALID); + } + } + + public void recordKeyCountSensorOtel(int keyNum, VeniceRequestValidationOutcome outcome) { + keyNumSensor.record(keyNum); + if (emitOpenTelemetryMetrics) { + Attributes dimensions = Attributes.builder() + .putAll(otelMetricDimensions) + .put(getDimensionName(VENICE_REQUEST_VALIDATION_OUTCOME), outcome.getOutcome()) + .build(); + keyCountSensorOtel.record(keyNum, dimensions); + } } public void recordRequestUsage(int usage) { @@ -336,18 +507,22 @@ public void recordUnavailableRequest() { public void recordDelayConstraintAbortedRetryRequest() { delayConstraintAbortedRetryRequest.record(); + recordAbortedRetrySensorOtel(VeniceRequestRetryAbortReason.RETRY_ABORTED_BY_DELAY_CONSTRAINT); } public void recordSlowRouteAbortedRetryRequest() { slowRouteAbortedRetryRequest.record(); + recordAbortedRetrySensorOtel(VeniceRequestRetryAbortReason.RETRY_ABORTED_BY_SLOW_ROUTE); } public void recordRetryRouteLimitAbortedRetryRequest() { retryRouteLimitAbortedRetryRequest.record(); + recordAbortedRetrySensorOtel(VeniceRequestRetryAbortReason.RETRY_ABORTED_BY_MAX_RETRY_ROUTE_LIMIT); } public void recordNoAvailableReplicaAbortedRetryRequest() { noAvailableReplicaAbortedRetryRequest.record(); + recordAbortedRetrySensorOtel(VeniceRequestRetryAbortReason.RETRY_ABORTED_BY_NO_AVAILABLE_REPLICA); } public void recordKeySizeInByte(long keySize) { @@ -358,7 +533,7 @@ public void recordKeySizeInByte(long keySize) { public void recordResponse() { /** - * We already report into the sensor when the request starts, in {@link #recordRequest()}, so at response time + * We already report into the sensor when the request starts, in {@link #recordIncomingRequest()}, so at response time * there is no need to record into the sensor again. We just want to maintain the bookkeeping. */ currentInFlightRequest.decrementAndGet(); @@ -367,10 +542,12 @@ public void recordResponse() { public void recordAllowedRetryRequest() { allowedRetryRequestSensor.record(); + allowedRetryRequestSensorOtel.add(1, otelMetricDimensions); } public void recordDisallowedRetryRequest() { disallowedRetryRequestSensor.record(); + disallowedRetryRequestSensorOtel.add(1, otelMetricDimensions); } public void recordErrorRetryAttemptTriggeredByPendingRequestCheck() { @@ -379,6 +556,9 @@ public void recordErrorRetryAttemptTriggeredByPendingRequestCheck() { public void recordRetryDelay(double delay) { retryDelaySensor.record(delay); + if (emitOpenTelemetryMetrics) { + retryDelaySensorOtel.record(delay, otelMetricDimensions); + } } public void recordMetaStoreShadowRead() { diff --git a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouterThrottleStats.java b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouterThrottleStats.java index 028a5c728a..42fbbd4b74 100644 --- a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouterThrottleStats.java +++ b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouterThrottleStats.java @@ -1,7 +1,7 @@ package com.linkedin.venice.router.stats; import com.linkedin.venice.stats.AbstractVeniceStats; -import io.tehuti.metrics.MetricsRepository; +import com.linkedin.venice.stats.VeniceMetricsRepository; import io.tehuti.metrics.Sensor; import io.tehuti.metrics.stats.Count; @@ -9,7 +9,7 @@ public class RouterThrottleStats extends AbstractVeniceStats { private final Sensor routerThrottleSensor; - public RouterThrottleStats(MetricsRepository repository, String name) { + public RouterThrottleStats(VeniceMetricsRepository repository, String name) { super(repository, name); routerThrottleSensor = registerSensor("router_throttled_request", new Count()); diff --git a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/SecurityStats.java b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/SecurityStats.java index 596a6564b0..14347ccc5d 100644 --- a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/SecurityStats.java +++ b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/SecurityStats.java @@ -2,7 +2,7 @@ import com.linkedin.alpini.netty4.ssl.SslInitializer; import com.linkedin.venice.stats.AbstractVeniceStats; -import io.tehuti.metrics.MetricsRepository; +import com.linkedin.venice.stats.VeniceMetricsRepository; import io.tehuti.metrics.Sensor; import io.tehuti.metrics.stats.AsyncGauge; import io.tehuti.metrics.stats.Avg; @@ -19,7 +19,7 @@ public class SecurityStats extends AbstractVeniceStats { private final Sensor sslLiveConnectionCount; private final Sensor nonSslConnectionCount; - public SecurityStats(MetricsRepository repository, String name, IntSupplier secureConnectionCountSupplier) { + public SecurityStats(VeniceMetricsRepository repository, String name, IntSupplier secureConnectionCountSupplier) { super(repository, name); this.secureConnectionCountSupplier = secureConnectionCountSupplier; this.sslErrorCount = registerSensor("ssl_error", new Count()); diff --git a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/StaleVersionStats.java b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/StaleVersionStats.java index 21ce6805c0..693de6fa14 100644 --- a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/StaleVersionStats.java +++ b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/StaleVersionStats.java @@ -1,8 +1,8 @@ package com.linkedin.venice.router.stats; import com.linkedin.venice.stats.AbstractVeniceStats; +import com.linkedin.venice.stats.VeniceMetricsRepository; import com.linkedin.venice.utils.concurrent.VeniceConcurrentHashMap; -import io.tehuti.metrics.MetricsRepository; import io.tehuti.metrics.Sensor; import io.tehuti.metrics.stats.Max; import io.tehuti.metrics.stats.OccurrenceRate; @@ -13,7 +13,7 @@ public class StaleVersionStats extends AbstractVeniceStats { private final VeniceConcurrentHashMap staleVersionReasonStats = new VeniceConcurrentHashMap<>(); - public StaleVersionStats(MetricsRepository metricsRepository, String name) { + public StaleVersionStats(VeniceMetricsRepository metricsRepository, String name) { super(metricsRepository, name); staleVersionStat = registerSensor("stale_version_delta", new Max()); for (StaleVersionReason reason: StaleVersionReason.values()) { diff --git a/services/venice-router/src/test/java/com/linkedin/venice/router/AggRouterHttpRequestStatsTest.java b/services/venice-router/src/test/java/com/linkedin/venice/router/AggRouterHttpRequestStatsTest.java index 3490d97483..2af917f1bd 100644 --- a/services/venice-router/src/test/java/com/linkedin/venice/router/AggRouterHttpRequestStatsTest.java +++ b/services/venice-router/src/test/java/com/linkedin/venice/router/AggRouterHttpRequestStatsTest.java @@ -1,10 +1,12 @@ package com.linkedin.venice.router; +import static io.netty.handler.codec.http.HttpResponseStatus.TOO_MANY_REQUESTS; + import com.linkedin.venice.meta.ReadOnlyStoreRepository; import com.linkedin.venice.read.RequestType; import com.linkedin.venice.router.stats.AggRouterHttpRequestStats; +import com.linkedin.venice.stats.VeniceMetricsRepository; import com.linkedin.venice.tehuti.MockTehutiReporter; -import io.tehuti.metrics.MetricsRepository; import org.mockito.Mockito; import org.testng.Assert; import org.testng.annotations.BeforeSuite; @@ -12,13 +14,13 @@ public class AggRouterHttpRequestStatsTest { - MetricsRepository metricsRepository; + VeniceMetricsRepository metricsRepository; private MockTehutiReporter reporter; private ReadOnlyStoreRepository storeMetadataRepository; @BeforeSuite public void setUp() { - this.metricsRepository = new MetricsRepository(); + this.metricsRepository = new VeniceMetricsRepository(); reporter = new MockTehutiReporter(); metricsRepository.addReporter(reporter); storeMetadataRepository = Mockito.mock(ReadOnlyStoreRepository.class); @@ -26,8 +28,12 @@ public void setUp() { @Test public void testAggRouterMetrics() { - AggRouterHttpRequestStats stats = - new AggRouterHttpRequestStats(metricsRepository, RequestType.SINGLE_GET, storeMetadataRepository, true); + AggRouterHttpRequestStats stats = new AggRouterHttpRequestStats( + metricsRepository, + "test-cluster", + RequestType.SINGLE_GET, + storeMetadataRepository, + true); stats.recordRequest("store5"); Assert.assertEquals(reporter.query(".total--request.Count").value(), 1d); @@ -37,8 +43,8 @@ public void testAggRouterMetrics() { Assert.assertNotNull(metricsRepository.getMetric(".store1--request.Count")); Assert.assertEquals(reporter.query(".store1--request.Count").value(), 1d); - stats.recordThrottledRequest("store1", 1.0); - stats.recordThrottledRequest("store2", 1.0); + stats.recordThrottledRequest("store1", 1.0, TOO_MANY_REQUESTS); + stats.recordThrottledRequest("store2", 1.0, TOO_MANY_REQUESTS); stats.recordErrorRetryCount("store1"); Assert.assertEquals(reporter.query(".total--request.Count").value(), 2d); Assert.assertEquals(reporter.query(".store1--request.Count").value(), 1d); @@ -59,8 +65,13 @@ public void testAggRouterMetrics() { @Test public void testProfilingMetrics() { - AggRouterHttpRequestStats stats = - new AggRouterHttpRequestStats(metricsRepository, RequestType.COMPUTE, true, storeMetadataRepository, true); + AggRouterHttpRequestStats stats = new AggRouterHttpRequestStats( + metricsRepository, + "test-cluster", + RequestType.COMPUTE, + true, + storeMetadataRepository, + true); for (int i = 1; i <= 100; i += 1) { stats.recordKeySize("store1", i); diff --git a/services/venice-router/src/test/java/com/linkedin/venice/router/RouteHttpRequestStatsTest.java b/services/venice-router/src/test/java/com/linkedin/venice/router/RouteHttpRequestStatsTest.java index a364f4a166..fdfa96bb08 100644 --- a/services/venice-router/src/test/java/com/linkedin/venice/router/RouteHttpRequestStatsTest.java +++ b/services/venice-router/src/test/java/com/linkedin/venice/router/RouteHttpRequestStatsTest.java @@ -7,9 +7,9 @@ import com.linkedin.venice.router.httpclient.StorageNodeClient; import com.linkedin.venice.router.stats.RouteHttpRequestStats; import com.linkedin.venice.router.stats.RouterHttpRequestStats; +import com.linkedin.venice.stats.VeniceMetricsRepository; import com.linkedin.venice.tehuti.MockTehutiReporter; import com.linkedin.venice.utils.metrics.MetricsRepositoryUtils; -import io.tehuti.metrics.MetricsRepository; import org.testng.Assert; import org.testng.annotations.BeforeSuite; import org.testng.annotations.Test; @@ -22,13 +22,18 @@ public class RouteHttpRequestStatsTest { @BeforeSuite public void setUp() { - MetricsRepository metrics = MetricsRepositoryUtils.createSingleThreadedMetricsRepository(); + VeniceMetricsRepository metrics = MetricsRepositoryUtils.createSingleThreadedVeniceMetricsRepository(); reporter = new MockTehutiReporter(); metrics.addReporter(reporter); stats = new RouteHttpRequestStats(metrics, mock(StorageNodeClient.class)); - routerHttpRequestStats = - new RouterHttpRequestStats(metrics, "", RequestType.SINGLE_GET, mock(ScatterGatherStats.class), false); + routerHttpRequestStats = new RouterHttpRequestStats( + metrics, + "test-store", + "test-cluster", + RequestType.SINGLE_GET, + mock(ScatterGatherStats.class), + false); } @Test @@ -46,7 +51,7 @@ public void routerMetricsTest() { Assert.assertEquals(stats.getPendingRequestCount("my_host1"), 1); Assert.assertEquals(stats.getPendingRequestCount("my_host2"), 0); - routerHttpRequestStats.recordRequest(); + routerHttpRequestStats.recordIncomingRequest(); Assert.assertTrue(RouterHttpRequestStats.hasInFlightRequests()); } } diff --git a/services/venice-router/src/test/java/com/linkedin/venice/router/api/TestVeniceDelegateMode.java b/services/venice-router/src/test/java/com/linkedin/venice/router/api/TestVeniceDelegateMode.java index e558337e71..01377fc91b 100644 --- a/services/venice-router/src/test/java/com/linkedin/venice/router/api/TestVeniceDelegateMode.java +++ b/services/venice-router/src/test/java/com/linkedin/venice/router/api/TestVeniceDelegateMode.java @@ -39,10 +39,10 @@ import com.linkedin.venice.router.stats.RouterStats; import com.linkedin.venice.router.throttle.ReadRequestThrottler; import com.linkedin.venice.schema.avro.ReadAvroProtocolDefinition; +import com.linkedin.venice.stats.VeniceMetricsRepository; import com.linkedin.venice.utils.HelixUtils; import com.linkedin.venice.utils.Utils; import io.netty.handler.codec.http.HttpMethod; -import io.tehuti.metrics.MetricsRepository; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; @@ -225,7 +225,8 @@ public void setUp() { RouterExceptionAndTrackingUtils.setRouterStats( new RouterStats<>( requestType -> new AggRouterHttpRequestStats( - new MetricsRepository(), + new VeniceMetricsRepository(), + "test-cluster", requestType, mock(ReadOnlyStoreRepository.class), true))); @@ -378,7 +379,8 @@ public void testLeastLoadedOnSlowHosts() throws RouterException { config, new RouterStats<>( requestType -> new AggRouterHttpRequestStats( - new MetricsRepository(), + new VeniceMetricsRepository(), + "test-cluster", requestType, mock(ReadOnlyStoreRepository.class), true)), @@ -702,7 +704,7 @@ public void testScatterForMultiGetWithHelixAssistedRouting() throws RouterExcept doReturn(1).when(helixInstanceConfigRepository).getInstanceGroupId(instance4.getNodeId()); HelixGroupSelector helixGroupSelector = new HelixGroupSelector( - new MetricsRepository(), + new VeniceMetricsRepository(), helixInstanceConfigRepository, HelixGroupSelectionStrategyEnum.ROUND_ROBIN, mock(TimeoutProcessor.class)); diff --git a/services/venice-router/src/test/java/com/linkedin/venice/router/api/TestVenicePathParser.java b/services/venice-router/src/test/java/com/linkedin/venice/router/api/TestVenicePathParser.java index b86f169b50..2ab4e77908 100644 --- a/services/venice-router/src/test/java/com/linkedin/venice/router/api/TestVenicePathParser.java +++ b/services/venice-router/src/test/java/com/linkedin/venice/router/api/TestVenicePathParser.java @@ -34,6 +34,7 @@ import com.linkedin.venice.schema.avro.ReadAvroProtocolDefinition; import com.linkedin.venice.serializer.RecordSerializer; import com.linkedin.venice.serializer.SerializerDeserializerFactory; +import com.linkedin.venice.stats.VeniceMetricsRepository; import io.netty.buffer.ByteBuf; import io.netty.buffer.Unpooled; import io.netty.handler.codec.http.DefaultHttpHeaders; @@ -41,7 +42,6 @@ import io.netty.handler.codec.http.HttpHeaders; import io.netty.handler.codec.http.HttpMethod; import io.netty.handler.codec.http.HttpVersion; -import io.tehuti.metrics.MetricsRepository; import java.nio.ByteBuffer; import java.util.AbstractMap; import java.util.ArrayList; @@ -100,7 +100,8 @@ public void setUp() { RouterExceptionAndTrackingUtils.setRouterStats( new RouterStats<>( requestType -> new AggRouterHttpRequestStats( - new MetricsRepository(), + new VeniceMetricsRepository(), + CLUSTER, requestType, mock(ReadOnlyStoreRepository.class), true))); @@ -128,7 +129,7 @@ public void testParseResourceUri_ComputeRequest() throws RouterException { storeRepository, mock(VeniceRouterConfig.class), mock(CompressorFactory.class), - mock(MetricsRepository.class), + mock(VeniceMetricsRepository.class), mock(ScheduledExecutorService.class)); String storeName = "test-store"; @@ -190,7 +191,7 @@ public void parsesQueries() throws RouterException { mock(ReadOnlyStoreRepository.class), MOCK_ROUTER_CONFIG, compressorFactory, - mock(MetricsRepository.class), + mock(VeniceMetricsRepository.class), mock(ScheduledExecutorService.class)); BasicFullHttpRequest request = new BasicFullHttpRequest(HttpVersion.HTTP_1_1, HttpMethod.GET, uri, 0, 0); VenicePath path = parser.parseResourceUri(uri, request); @@ -221,7 +222,7 @@ public void parsesB64Uri() throws RouterException { mock(ReadOnlyStoreRepository.class), MOCK_ROUTER_CONFIG, compressorFactory, - mock(MetricsRepository.class), + mock(VeniceMetricsRepository.class), mock(ScheduledExecutorService.class)).parseResourceUri(myUri, request); ByteBuffer partitionKey = path.getPartitionKey().getKeyBuffer(); Assert.assertEquals( @@ -242,7 +243,7 @@ public void failsToParseOtherActions() throws RouterException { mock(ReadOnlyStoreRepository.class), MOCK_ROUTER_CONFIG, compressorFactory, - mock(MetricsRepository.class), + mock(VeniceMetricsRepository.class), mock(ScheduledExecutorService.class)).parseResourceUri("/badAction/storeName/key"); } @@ -289,7 +290,7 @@ public void parseRequestWithBatchSizeViolation() throws RouterException { storeRepository, MOCK_ROUTER_CONFIG, compressorFactory, - mock(MetricsRepository.class), + mock(VeniceMetricsRepository.class), mock(ScheduledExecutorService.class)); try { pathParser.parseResourceUri(myUri, request); diff --git a/services/venice-router/src/test/java/com/linkedin/venice/router/api/TestVeniceResponseAggregator.java b/services/venice-router/src/test/java/com/linkedin/venice/router/api/TestVeniceResponseAggregator.java index 3075773715..0d01bea6a2 100644 --- a/services/venice-router/src/test/java/com/linkedin/venice/router/api/TestVeniceResponseAggregator.java +++ b/services/venice-router/src/test/java/com/linkedin/venice/router/api/TestVeniceResponseAggregator.java @@ -219,7 +219,7 @@ public void testBuildResponseForMultiGet() { FullHttpResponse response5 = buildFullHttpResponse(TOO_MANY_REQUESTS, new byte[0], headers); metrics.setMetric(MetricNames.ROUTER_SERVER_TIME, new TimeValue(1, TimeUnit.MILLISECONDS)); responseAggregator.buildResponse(request, metrics, Collections.singletonList(response5)); - verify(mockStatsForMultiGet).recordThrottledRequest(storeName, 1.0); + verify(mockStatsForMultiGet).recordThrottledRequest(storeName, 1.0, TOO_MANY_REQUESTS); } @Test diff --git a/services/venice-router/src/test/java/com/linkedin/venice/router/api/TestVeniceVersionFinder.java b/services/venice-router/src/test/java/com/linkedin/venice/router/api/TestVeniceVersionFinder.java index 436ac12ffe..d70afcd8ac 100644 --- a/services/venice-router/src/test/java/com/linkedin/venice/router/api/TestVeniceVersionFinder.java +++ b/services/venice-router/src/test/java/com/linkedin/venice/router/api/TestVeniceVersionFinder.java @@ -28,11 +28,11 @@ import com.linkedin.venice.meta.VersionStatus; import com.linkedin.venice.meta.ZKStore; import com.linkedin.venice.router.stats.StaleVersionStats; +import com.linkedin.venice.stats.VeniceMetricsRepository; import com.linkedin.venice.utils.TestUtils; import com.linkedin.venice.utils.Utils; import io.netty.handler.codec.http.HttpMethod; import io.netty.handler.codec.http.HttpVersion; -import io.tehuti.metrics.MetricsRepository; import io.tehuti.metrics.Sensor; import java.nio.ByteBuffer; import java.util.HashMap; @@ -75,7 +75,7 @@ public void throws404onMissingStore() { clusterToD2Map, CLUSTER, compressorFactory, - mock(MetricsRepository.class)); + mock(VeniceMetricsRepository.class)); try { versionFinder.getVersion("", request); Assert.fail( @@ -115,7 +115,7 @@ public void throws301onMigratedStore() { clusterToD2Map, CLUSTER, compressorFactory, - mock(MetricsRepository.class)); + mock(VeniceMetricsRepository.class)); try { request.headers().add(HttpConstants.VENICE_ALLOW_REDIRECT, "1"); versionFinder.getVersion("store", request); @@ -150,7 +150,7 @@ public void returnNonExistingVersionOnceStoreIsDisabled() { clusterToD2Map, CLUSTER, compressorFactory, - mock(MetricsRepository.class)); + mock(VeniceMetricsRepository.class)); try { versionFinder.getVersion(storeName, request); Assert.fail("Store should be disabled and forbidden to read."); @@ -190,7 +190,7 @@ public void testSwapsVersionWhenAllPartitionsAreOnline() { HelixReadOnlyStoreConfigRepository storeConfigRepo = mock(HelixReadOnlyStoreConfigRepository.class); CompressorFactory compressorFactory = mock(CompressorFactory.class); - MetricsRepository mockMetricsRepository = mock(MetricsRepository.class); + VeniceMetricsRepository mockMetricsRepository = mock(VeniceMetricsRepository.class); final Sensor mockSensor = mock(Sensor.class); doReturn(mockSensor).when(mockMetricsRepository).sensor(anyString(), any()); @@ -277,7 +277,7 @@ public void returnsCurrentVersionWhenTheDictionaryExists() { clusterToD2Map, CLUSTER, compressorFactory, - mock(MetricsRepository.class)); + mock(VeniceMetricsRepository.class)); String firstVersionKafkaTopic = Version.composeKafkaTopic(storeName, firstVersion); @@ -326,7 +326,7 @@ public void returnsCurrentVersionWhenItIsTheOnlyOption() { clusterToD2Map, CLUSTER, compressorFactory, - mock(MetricsRepository.class)); + mock(VeniceMetricsRepository.class)); String firstVersionKafkaTopic = Version.composeKafkaTopic(storeName, firstVersion); @@ -361,7 +361,7 @@ public void returnsPreviousVersionWhenDictionaryNotDownloaded() { doReturn(true).when(routingDataRepo).containsKafkaTopic(anyString()); CompressorFactory compressorFactory = mock(CompressorFactory.class); - MetricsRepository mockMetricsRepository = mock(MetricsRepository.class); + VeniceMetricsRepository mockMetricsRepository = mock(VeniceMetricsRepository.class); final Sensor mockSensor = mock(Sensor.class); doReturn(mockSensor).when(mockMetricsRepository).sensor(anyString(), any()); @@ -419,7 +419,7 @@ public void returnsNewVersionWhenDictionaryDownloads() { doReturn(3).when(routingDataRepo).getNumberOfPartitions(anyString()); doReturn(instances).when(routingDataRepo).getReadyToServeInstances(anyString(), anyInt()); doReturn(true).when(routingDataRepo).containsKafkaTopic(anyString()); - MetricsRepository mockMetricsRepository = mock(MetricsRepository.class); + VeniceMetricsRepository mockMetricsRepository = mock(VeniceMetricsRepository.class); final Sensor mockSensor = mock(Sensor.class); doReturn(mockSensor).when(mockMetricsRepository).sensor(anyString(), any()); diff --git a/services/venice-router/src/test/java/com/linkedin/venice/router/api/path/TestVeniceMultiGetPath.java b/services/venice-router/src/test/java/com/linkedin/venice/router/api/path/TestVeniceMultiGetPath.java index 3a0cbc98f0..f4100ff399 100644 --- a/services/venice-router/src/test/java/com/linkedin/venice/router/api/path/TestVeniceMultiGetPath.java +++ b/services/venice-router/src/test/java/com/linkedin/venice/router/api/path/TestVeniceMultiGetPath.java @@ -20,11 +20,11 @@ import com.linkedin.venice.schema.avro.ReadAvroProtocolDefinition; import com.linkedin.venice.serializer.RecordSerializer; import com.linkedin.venice.serializer.SerializerDeserializerFactory; +import com.linkedin.venice.stats.VeniceMetricsRepository; import com.linkedin.venice.utils.Utils; import io.netty.buffer.Unpooled; import io.netty.handler.codec.http.HttpMethod; import io.netty.handler.codec.http.HttpVersion; -import io.tehuti.metrics.MetricsRepository; import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.List; @@ -37,14 +37,15 @@ public class TestVeniceMultiGetPath { private final RetryManager disabledRetryManager = - new RetryManager(new MetricsRepository(), "disabled-test-retry-manager", 0, 0, null); + new RetryManager(new VeniceMetricsRepository(), "disabled-test-retry-manager", 0, 0, null); @BeforeClass public void setUp() { RouterExceptionAndTrackingUtils.setRouterStats( new RouterStats<>( requestType -> new AggRouterHttpRequestStats( - new MetricsRepository(), + new VeniceMetricsRepository(), + "test-cluster", requestType, mock(ReadOnlyStoreRepository.class), true))); diff --git a/services/venice-router/src/test/java/com/linkedin/venice/router/api/path/TestVenicePath.java b/services/venice-router/src/test/java/com/linkedin/venice/router/api/path/TestVenicePath.java index da07b2f926..eef224113d 100644 --- a/services/venice-router/src/test/java/com/linkedin/venice/router/api/path/TestVenicePath.java +++ b/services/venice-router/src/test/java/com/linkedin/venice/router/api/path/TestVenicePath.java @@ -10,12 +10,12 @@ import com.linkedin.venice.read.RequestType; import com.linkedin.venice.router.api.RouterKey; import com.linkedin.venice.schema.avro.ReadAvroProtocolDefinition; +import com.linkedin.venice.stats.VeniceMetricsRepository; import com.linkedin.venice.utils.TestMockTime; import com.linkedin.venice.utils.TestUtils; import com.linkedin.venice.utils.Time; import io.netty.handler.codec.http.HttpMethod; import io.netty.handler.codec.http.HttpResponseStatus; -import io.tehuti.metrics.MetricsRepository; import java.time.Clock; import java.util.Collection; import java.util.concurrent.ScheduledExecutorService; @@ -83,13 +83,13 @@ public String getLocation() { } private RetryManager disabledRetryManager; - private MetricsRepository metricsRepository; + private VeniceMetricsRepository metricsRepository; private final ScheduledExecutorService retryManagerScheduler = Executors.newScheduledThreadPool(1); @BeforeMethod public void setUp() { - metricsRepository = new MetricsRepository(); + metricsRepository = new VeniceMetricsRepository(); // retry manager is disabled by default disabledRetryManager = new RetryManager(metricsRepository, "disabled-test-retry-manager", 0, 0, retryManagerScheduler); diff --git a/services/venice-router/src/test/java/com/linkedin/venice/router/stats/AdminOperationsStatsTest.java b/services/venice-router/src/test/java/com/linkedin/venice/router/stats/AdminOperationsStatsTest.java index 57e82128ff..35364da6b1 100644 --- a/services/venice-router/src/test/java/com/linkedin/venice/router/stats/AdminOperationsStatsTest.java +++ b/services/venice-router/src/test/java/com/linkedin/venice/router/stats/AdminOperationsStatsTest.java @@ -3,9 +3,9 @@ import static org.mockito.Mockito.*; import com.linkedin.venice.router.VeniceRouterConfig; +import com.linkedin.venice.stats.VeniceMetricsRepository; import com.linkedin.venice.tehuti.MockTehutiReporter; import com.linkedin.venice.utils.metrics.MetricsRepositoryUtils; -import io.tehuti.metrics.MetricsRepository; import org.testng.Assert; import org.testng.annotations.Test; @@ -13,7 +13,7 @@ public class AdminOperationsStatsTest { @Test public void testAdminOperationsStats() { - MetricsRepository metrics = MetricsRepositoryUtils.createSingleThreadedMetricsRepository(); + VeniceMetricsRepository metrics = MetricsRepositoryUtils.createSingleThreadedVeniceMetricsRepository(); MockTehutiReporter reporter = new MockTehutiReporter(); metrics.addReporter(reporter); VeniceRouterConfig mockConfig = mock(VeniceRouterConfig.class); diff --git a/services/venice-server/src/main/java/com/linkedin/venice/stats/AggServerHttpRequestStats.java b/services/venice-server/src/main/java/com/linkedin/venice/stats/AggServerHttpRequestStats.java index 8305d622d6..1fe0116fc9 100644 --- a/services/venice-server/src/main/java/com/linkedin/venice/stats/AggServerHttpRequestStats.java +++ b/services/venice-server/src/main/java/com/linkedin/venice/stats/AggServerHttpRequestStats.java @@ -25,7 +25,7 @@ public AggServerHttpRequestStats( unregisterMetricForDeletedStoreEnabled); } - static class ServerHttpRequestStatsSupplier implements StatsSupplier { + static class ServerHttpRequestStatsSupplier implements StatsSupplierMetricsRepository { private final RequestType requestType; private final boolean isKeyValueProfilingEnabled; From 9e2fba336fead87be199098fa38c2dac41ce78dc Mon Sep 17 00:00:00 2001 From: Manoj Nagarajan Date: Mon, 18 Nov 2024 05:30:40 -0800 Subject: [PATCH 02/19] address review comments part 1 --- build.gradle | 11 +- .../com/linkedin/venice/read/RequestType.java | 13 +- .../linkedin/venice/stats/TehutiUtils.java | 5 +- .../venice/stats/VeniceMetricsConfig.java | 351 +++++++++++------- .../venice/stats/VeniceMetricsRepository.java | 5 +- .../VeniceOpenTelemetryConfigProperties.java | 284 -------------- .../VeniceOpenTelemetryMetricFormat.java | 34 -- ...VeniceOpenTelemetryMetricNamingFormat.java | 93 +++++ .../VeniceOpenTelemetryMetricsRepository.java | 209 +++-------- .../venice/utils/VeniceProperties.java | 2 +- .../utils/metrics/MetricsRepositoryUtils.java | 7 +- .../venice/stats/VeniceMetricsConfigTest.java | 160 +++++--- .../stats/VeniceMetricsRepositoryTest.java | 11 +- ...iceOpenTelemetryMetricsRepositoryTest.java | 54 +-- .../VeniceHttpResponseStatusCodeCategory.java | 123 ++---- .../dimensions/VeniceMetricsDimensions.java | 20 +- ...iceHttpResponseStatusCodeCategoryTest.java | 49 +-- .../VeniceMetricsDimensionsTest.java | 8 +- .../utils/VeniceRouterWrapper.java | 12 +- .../router/api/TestVeniceDispatcher.java | 20 +- .../linkedin/venice/router/RouterServer.java | 21 +- .../router/api/VeniceResponseAggregator.java | 1 + .../router/stats/RouterHttpRequestStats.java | 57 +-- .../router/AggRouterHttpRequestStatsTest.java | 3 +- .../router/RouteHttpRequestStatsTest.java | 3 +- .../router/api/TestVeniceDelegateMode.java | 45 ++- .../router/api/TestVenicePathParser.java | 21 +- .../api/path/TestVeniceMultiGetPath.java | 24 +- .../router/api/path/TestVenicePath.java | 3 +- .../stats/AdminOperationsStatsTest.java | 3 +- 30 files changed, 711 insertions(+), 941 deletions(-) delete mode 100644 internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceOpenTelemetryConfigProperties.java delete mode 100644 internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceOpenTelemetryMetricFormat.java create mode 100644 internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceOpenTelemetryMetricNamingFormat.java diff --git a/build.gradle b/build.gradle index b2082801c2..5e75849b46 100644 --- a/build.gradle +++ b/build.gradle @@ -54,6 +54,7 @@ def hadoopVersion = '2.10.2' def apacheSparkVersion = '3.3.3' def antlrVersion = '4.8' def scala = '2.12' +def openTelemetryVersion = '1.43.0' ext.libraries = [ alpnAgent: "org.mortbay.jetty.alpn:jetty-alpn-agent:${alpnAgentVersion}", @@ -141,11 +142,11 @@ ext.libraries = [ zkclient: 'com.101tec:zkclient:0.7', // For Kafka AdminUtils zookeeper: 'org.apache.zookeeper:zookeeper:3.6.3', zstd: 'com.github.luben:zstd-jni:1.5.2-3', - opentelemetryApi: "io.opentelemetry:opentelemetry-api:1.43.0", - opentelemetrySdk: "io.opentelemetry:opentelemetry-sdk:1.43.0", - opentelemetryExporterLogging: "io.opentelemetry:opentelemetry-exporter-logging:1.43.0", - opentelemetryExporterOtlp: "io.opentelemetry:opentelemetry-exporter-otlp:1.43.0", - opentelemetryExporterCommon: "io.opentelemetry:opentelemetry-exporter-common:1.43.0" + opentelemetryApi: "io.opentelemetry:opentelemetry-api:${openTelemetryVersion}", + opentelemetrySdk: "io.opentelemetry:opentelemetry-sdk:${openTelemetryVersion}", + opentelemetryExporterLogging: "io.opentelemetry:opentelemetry-exporter-logging:${openTelemetryVersion}", + opentelemetryExporterOtlp: "io.opentelemetry:opentelemetry-exporter-otlp:${openTelemetryVersion}", + opentelemetryExporterCommon: "io.opentelemetry:opentelemetry-exporter-common:${openTelemetryVersion}" ] group = 'com.linkedin.venice' diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/read/RequestType.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/read/RequestType.java index b734b017ab..b426ee1aab 100644 --- a/internal/venice-client-common/src/main/java/com/linkedin/venice/read/RequestType.java +++ b/internal/venice-client-common/src/main/java/com/linkedin/venice/read/RequestType.java @@ -1,23 +1,16 @@ package com.linkedin.venice.read; public enum RequestType { - SINGLE_GET("", "single_get"), MULTI_GET("multiget_", "multi_get"), - MULTI_GET_STREAMING("multiget_streaming_", "multi_get_streaming"), COMPUTE("compute_", "compute"), - COMPUTE_STREAMING("compute_streaming_", "compute_streaming"); + SINGLE_GET(""), MULTI_GET("multiget_"), MULTI_GET_STREAMING("multiget_streaming_"), COMPUTE("compute_"), + COMPUTE_STREAMING("compute_streaming_"); private String metricPrefix; - private String requestTypeName; - RequestType(String metricPrefix, String requestTypeName) { + RequestType(String metricPrefix) { this.metricPrefix = metricPrefix; - this.requestTypeName = requestTypeName; } public String getMetricPrefix() { return this.metricPrefix; } - - public String getRequestTypeName() { - return this.requestTypeName; - } } diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/TehutiUtils.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/TehutiUtils.java index 7ef5a7bdf5..86909d3ef1 100644 --- a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/TehutiUtils.java +++ b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/TehutiUtils.java @@ -10,6 +10,7 @@ import io.tehuti.metrics.stats.Rate; import java.util.Arrays; import java.util.Map; +import org.apache.commons.cli.MissingArgumentException; /** @@ -133,9 +134,9 @@ public static MetricsRepository getMetricsRepository(String serviceName) { public static VeniceMetricsRepository getVeniceMetricsRepository( String serviceName, String metricPrefix, - Map configs) { + Map configs) throws MissingArgumentException { VeniceMetricsRepository metricsRepository = new VeniceMetricsRepository( - new VeniceMetricsConfig.VeniceMetricsConfigBuilder().setServiceName(serviceName) + new VeniceMetricsConfig.Builder().setServiceName(serviceName) .setMetricPrefix(metricPrefix) .extractAndSetOtelConfigs(configs) .build()); diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceMetricsConfig.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceMetricsConfig.java index 586143f276..e0a4fb2ae8 100644 --- a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceMetricsConfig.java +++ b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceMetricsConfig.java @@ -1,11 +1,16 @@ package com.linkedin.venice.stats; -import com.linkedin.venice.exceptions.VeniceException; +import io.opentelemetry.exporter.otlp.internal.OtlpConfigUtil; +import io.opentelemetry.sdk.metrics.Aggregation; +import io.opentelemetry.sdk.metrics.InstrumentType; +import io.opentelemetry.sdk.metrics.export.AggregationTemporalitySelector; +import io.opentelemetry.sdk.metrics.export.DefaultAggregationSelector; import io.opentelemetry.sdk.metrics.export.MetricExporter; import io.tehuti.metrics.MetricConfig; import java.util.HashMap; import java.util.Locale; import java.util.Map; +import org.apache.commons.cli.MissingArgumentException; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; @@ -14,165 +19,254 @@ public class VeniceMetricsConfig { private static final Logger LOGGER = LogManager.getLogger(VeniceMetricsConfig.class); private final String serviceName; private final String metricPrefix; - /** config to control whether to emit OpenTelemetry or tehuti metrics or both - * emitTehutiMetrics is not used for now */ - private final boolean emitOpenTelemetryMetrics; - private final boolean emitTehutiMetrics; + /** reusing tehuti's MetricConfig */ + private final MetricConfig tehutiMetricConfig; + + /** Below are the configs for OpenTelemetry metrics */ + + /** Feature flag to use OpenTelemetry instrumentation for metrics or not */ + private final boolean emitOTelMetrics; /** extra configs for OpenTelemetry. Supports 2 exporter currently
* 1. {@link MetricExporter} for exporting to Http/Grpc endpoint. More details are supported via configs, - * check {@link VeniceMetricsConfigBuilder#extractAndSetOtelConfigs} and {@link VeniceOpenTelemetryMetricsRepository#getOtlpHttpMetricExporter}
+ * check {@link Builder#extractAndSetOtelConfigs} and {@link VeniceOpenTelemetryMetricsRepository#getOtlpHttpMetricExporter}
* 2. {@link VeniceOpenTelemetryMetricsRepository.LogBasedMetricExporter} for debug purposes */ - private final Map otelConfigs; - private final boolean emitToHttpGrpcEndpoint; - private final boolean emitToLog; // for debug purposes - private final VeniceOpenTelemetryMetricFormat metricFormat; - private final boolean useExponentialHistogram; - private final int exponentialHistogramMaxScale; - private final int exponentialHistogramMaxBuckets; + private final boolean exportOtelMetricsToEndpoint; + private final boolean exportOtelMetricsToLog; - /** reusing tehuti's MetricConfig */ - private final MetricConfig tehutiMetricConfig; + /** + * protocol for OpenTelemetry exporter. supports + * 1. {@link OtlpConfigUtil#PROTOCOL_HTTP_PROTOBUF} => "http/protobuf" + * 2. {@link OtlpConfigUtil#PROTOCOL_GRPC} => "grpc" + */ + private final String otelExportProtocol; + + /** endpoint to export OpenTelemetry Metrics to */ + private final String otelEndpoint; - private VeniceMetricsConfig(VeniceMetricsConfigBuilder veniceMetricsConfigBuilder) { - this.serviceName = veniceMetricsConfigBuilder.serviceName; - this.metricPrefix = veniceMetricsConfigBuilder.metricPrefix; - this.emitOpenTelemetryMetrics = veniceMetricsConfigBuilder.emitOpenTelemetryMetrics; - this.emitTehutiMetrics = veniceMetricsConfigBuilder.emitTehutiMetrics; - this.emitToHttpGrpcEndpoint = veniceMetricsConfigBuilder.emitToHttpGrpcEndpoint; - this.emitToLog = veniceMetricsConfigBuilder.emitToLog; - this.metricFormat = veniceMetricsConfigBuilder.metricFormat; - this.useExponentialHistogram = veniceMetricsConfigBuilder.useExponentialHistogram; - this.exponentialHistogramMaxScale = veniceMetricsConfigBuilder.exponentialHistogramMaxScale; - this.exponentialHistogramMaxBuckets = veniceMetricsConfigBuilder.exponentialHistogramMaxBuckets; - this.otelConfigs = veniceMetricsConfigBuilder.otelConfigs; - this.tehutiMetricConfig = veniceMetricsConfigBuilder.tehutiMetricConfig; + /** Headers to be passed while creating OpenTelemetry exporter */ + private final Map otelHeaders; + + /** Metric naming conventions for OpenTelemetry metrics */ + private final VeniceOpenTelemetryMetricNamingFormat metricNamingFormat; + + /** Aggregation Temporality selector to export only the delta or cumulate or different */ + private final AggregationTemporalitySelector otelAggregationTemporalitySelector; + + /** Default histogram aggregation to be used for all histograms: Select exponential or explicit bucket histogram */ + private final DefaultAggregationSelector otelHistogramAggregationSelector; + + private VeniceMetricsConfig(Builder builder) { + this.serviceName = builder.serviceName; + this.metricPrefix = builder.metricPrefix; + this.emitOTelMetrics = builder.emitOtelMetrics; + this.exportOtelMetricsToEndpoint = builder.exportOtelMetricsToEndpoint; + this.otelExportProtocol = builder.otelExportProtocol; + this.otelEndpoint = builder.otelEndpoint; + this.otelHeaders = builder.otelHeaders; + this.exportOtelMetricsToLog = builder.exportOtelMetricsToLog; + this.metricNamingFormat = builder.metricNamingFormat; + this.otelAggregationTemporalitySelector = builder.otelAggregationTemporalitySelector; + this.otelHistogramAggregationSelector = builder.otelHistogramAggregationSelector; + this.tehutiMetricConfig = builder.tehutiMetricConfig; } - public static class VeniceMetricsConfigBuilder { - private String serviceName = "noop_service"; + public static class Builder { + private String serviceName = null; private String metricPrefix = null; - private boolean emitOpenTelemetryMetrics = false; - private boolean emitTehutiMetrics = true; - private boolean emitToHttpGrpcEndpoint = false; - private boolean emitToLog = false; - private VeniceOpenTelemetryMetricFormat metricFormat = VeniceOpenTelemetryMetricFormat.SNAKE_CASE; - private boolean useExponentialHistogram = true; - private int exponentialHistogramMaxScale = 3; - private int exponentialHistogramMaxBuckets = 250; - private Map otelConfigs = new HashMap<>(); + private boolean emitOtelMetrics = false; + private boolean exportOtelMetricsToEndpoint = false; + private String otelExportProtocol = OtlpConfigUtil.PROTOCOL_HTTP_PROTOBUF; + private String otelEndpoint = null; + Map otelHeaders = new HashMap<>(); + private boolean exportOtelMetricsToLog = false; + private VeniceOpenTelemetryMetricNamingFormat metricNamingFormat = VeniceOpenTelemetryMetricNamingFormat.SNAKE_CASE; + private AggregationTemporalitySelector otelAggregationTemporalitySelector = + AggregationTemporalitySelector.deltaPreferred(); + DefaultAggregationSelector otelHistogramAggregationSelector = null; private MetricConfig tehutiMetricConfig = null; - public VeniceMetricsConfigBuilder setServiceName(String serviceName) { + public Builder setServiceName(String serviceName) { this.serviceName = serviceName; return this; } - public VeniceMetricsConfigBuilder setMetricPrefix(String metricPrefix) { + public Builder setMetricPrefix(String metricPrefix) { this.metricPrefix = metricPrefix; return this; } - public VeniceMetricsConfigBuilder setEmitOpenTelemetryMetrics(boolean emitOpenTelemetryMetrics) { - this.emitOpenTelemetryMetrics = emitOpenTelemetryMetrics; + public Builder setEmitOtelMetrics(boolean emitOtelMetrics) { + this.emitOtelMetrics = emitOtelMetrics; return this; } - public VeniceMetricsConfigBuilder setEmitTehutiMetrics(boolean emitTehutiMetrics) { - this.emitTehutiMetrics = emitTehutiMetrics; + public Builder setExportOtelMetricsToEndpoint(boolean exportOtelMetricsToEndpoint) { + this.exportOtelMetricsToEndpoint = exportOtelMetricsToEndpoint; return this; } - public VeniceMetricsConfigBuilder setEmitToHttpGrpcEndpoint(boolean emitToHttpGrpcEndpoint) { - this.emitToHttpGrpcEndpoint = emitToHttpGrpcEndpoint; + public Builder setOtelExportProtocol(String otelExportProtocol) { + this.otelExportProtocol = otelExportProtocol; return this; } - public VeniceMetricsConfigBuilder setEmitToLog(boolean emitToLog) { - this.emitToLog = emitToLog; + public Builder setOtelEndpoint(String otelEndpoint) { + this.otelEndpoint = otelEndpoint; return this; } - public VeniceMetricsConfigBuilder setMetricFormat(VeniceOpenTelemetryMetricFormat metricFormat) { - this.metricFormat = metricFormat; + public Builder setExportOtelMetricsToLog(boolean exportOtelMetricsToLog) { + this.exportOtelMetricsToLog = exportOtelMetricsToLog; return this; } - public VeniceMetricsConfigBuilder extractAndSetOtelConfigs(Map configs) { - // copy only OpenTelemetry related configs - for (Map.Entry entry: configs.entrySet()) { - if (entry.getKey().startsWith("otel.")) { - otelConfigs.put(entry.getKey(), entry.getValue()); - } - } - LOGGER.info("OpenTelemetry configs: {}", otelConfigs); + public Builder setMetricNamingFormat(VeniceOpenTelemetryMetricNamingFormat metricNamingFormat) { + this.metricNamingFormat = metricNamingFormat; return this; } - public VeniceMetricsConfigBuilder setTehutiMetricConfig(MetricConfig tehutiMetricConfig) { - this.tehutiMetricConfig = tehutiMetricConfig; + public Builder setOtelAggregationTemporalitySelector( + AggregationTemporalitySelector otelAggregationTemporalitySelector) { + this.otelAggregationTemporalitySelector = otelAggregationTemporalitySelector; return this; } - /** get the last part of the service name - * For instance: if service name is "venice-router", return "router" + public Builder setOtelHistogramAggregationSelector(DefaultAggregationSelector otelHistogramAggregationSelector) { + this.otelHistogramAggregationSelector = otelHistogramAggregationSelector; + return this; + } + + /** + * Extract and set otel configs */ - public static String getMetricsPrefix(String input) { - String[] parts = input.split("[\\-\\._]"); - String lastPart = parts[parts.length - 1]; - return lastPart; + public Builder extractAndSetOtelConfigs(Map configs) { + String configValue; + if ((configValue = configs.get("otel.venice.enabled")) != null) { + setEmitOtelMetrics(configValue.toLowerCase(Locale.ROOT).equals("true")); + } + + if ((configValue = configs.get("otel.venice.export.to.log")) != null) { + setExportOtelMetricsToLog(configValue.toLowerCase(Locale.ROOT).equals("true")); + } + + if ((configValue = configs.get("otel.venice.export.to.endpoint")) != null) { + setExportOtelMetricsToEndpoint(configValue.toLowerCase(Locale.ROOT).equals("true")); + } + + if ((configValue = configs.get("otel.exporter.otlp.metrics.protocol")) != null) { + setOtelExportProtocol(configValue); + } + + if ((configValue = configs.get("otel.venice.metrics.format")) != null) { + setMetricNamingFormat(VeniceOpenTelemetryMetricNamingFormat.valueOf(configValue.toUpperCase(Locale.ROOT))); + } + + if ((configValue = configs.get("otel.exporter.otlp.metrics.endpoint")) != null) { + // validate endpoint: TODO + setOtelEndpoint(configValue); + } + + /** + * Headers are passed as key=value pairs separated by '=' + * Multiple headers are separated by ',' + * + * Currently supporting 1 header + */ + if ((configValue = configs.get("otel.exporter.otlp.metrics.headers")) != null) { + String[] headers = configValue.split("="); + otelHeaders.put(headers[0], headers[1]); + } + + if ((configValue = configs.get("otel.exporter.otlp.metrics.temporality.preference")) != null) { + switch (configValue.toLowerCase(Locale.ROOT)) { + case "cumulative": + setOtelAggregationTemporalitySelector(AggregationTemporalitySelector.alwaysCumulative()); + break; + case "delta": + setOtelAggregationTemporalitySelector(AggregationTemporalitySelector.deltaPreferred()); + break; + case "lowmemory": + setOtelAggregationTemporalitySelector(AggregationTemporalitySelector.lowMemory()); + break; + default: + throw new IllegalArgumentException("Unrecognized aggregation temporality: " + configValue); + } + } + + if ((configValue = configs.get("otel.exporter.otlp.metrics.default.histogram.aggregation")) != null) { + switch (configValue.toLowerCase(Locale.ROOT)) { + case "base2_exponential_bucket_histogram": + String maxScaleValue = configs.get("otel.exporter.otlp.metrics.default.histogram.aggregation.max.scale"); + String maxBucketValue = configs.get("otel.exporter.otlp.metrics.default.histogram.aggregation.max.buckets"); + if (maxScaleValue != null && maxBucketValue != null) { + int maxScale = Integer.parseInt(maxScaleValue); + int maxBuckets = Integer.parseInt(maxBucketValue); + setOtelHistogramAggregationSelector( + DefaultAggregationSelector.getDefault() + .with( + InstrumentType.HISTOGRAM, + Aggregation.base2ExponentialBucketHistogram(maxBuckets, maxScale))); + } else { + setOtelHistogramAggregationSelector( + DefaultAggregationSelector.getDefault() + .with(InstrumentType.HISTOGRAM, Aggregation.base2ExponentialBucketHistogram())); + } + break; + + case "explicit_bucket_histogram": + setOtelHistogramAggregationSelector( + DefaultAggregationSelector.getDefault() + .with(InstrumentType.HISTOGRAM, Aggregation.explicitBucketHistogram())); + break; + + default: + throw new IllegalArgumentException("Unrecognized default histogram aggregation: " + configValue); + } + } + + // todo: add more configs + // "otel.exporter.otlp.metrics.compression" + // "otel.exporter.otlp.metrics.timeout" + return this; + } + + public Builder setTehutiMetricConfig(MetricConfig tehutiMetricConfig) { + this.tehutiMetricConfig = tehutiMetricConfig; + return this; } // Validate required fields before building - private void checkAndSetDefaults() { + private void checkAndSetDefaults() throws MissingArgumentException { if (tehutiMetricConfig == null) { setTehutiMetricConfig(new MetricConfig()); } - if (metricPrefix == null) { - setMetricPrefix(getMetricsPrefix(serviceName)); + + if (serviceName == null) { + throw new MissingArgumentException("serviceName is required to configure OpenTelemetry"); } - if (otelConfigs.containsKey("otel.venice.enabled")) { - String status = otelConfigs.get("otel.venice.enabled"); - if (status != null) { - setEmitOpenTelemetryMetrics(status.toLowerCase(Locale.ROOT).equals("true")); - } + + if (metricPrefix == null) { + LOGGER.warn("metricPrefix is not set. Defaulting to empty string"); + setMetricPrefix(""); } - // check otelConfigs and set defaults - if (emitOpenTelemetryMetrics) { - if (otelConfigs.containsKey("otel.venice.export.to.log")) { - String emitStatus = otelConfigs.get("otel.venice.export.to.log"); - if (emitStatus != null) { - setEmitToLog(emitStatus.toLowerCase(Locale.ROOT).equals("true")); - } - } - if (otelConfigs.containsKey("otel.venice.export.to.http.grpc.endpoint")) { - String emitStatus = otelConfigs.get("otel.venice.export.to.http.grpc.endpoint"); - if (emitStatus != null) { - setEmitToHttpGrpcEndpoint(emitStatus.toLowerCase(Locale.ROOT).equals("true")); - } - } - if (otelConfigs.containsKey("otel.venice.metrics.format")) { - String format = otelConfigs.get("otel.venice.metrics.format"); - if (format != null) { - try { - setMetricFormat(VeniceOpenTelemetryMetricFormat.valueOf(format.toUpperCase(Locale.ROOT))); - } catch (IllegalArgumentException e) { - LOGGER.warn("Invalid metric format: {}, setting to default: {}", format, metricFormat); - } - } - } - if (emitToHttpGrpcEndpoint) { - if (!otelConfigs.containsKey("otel.exporter.otlp.metrics.protocol") - || !otelConfigs.containsKey("otel.exporter.otlp.metrics.endpoint")) { - throw new VeniceException( - "otel settings missing for otel.exporter.otlp.metrics.protocol and otel.exporter.otlp.metrics.endpoint"); + + if (emitOtelMetrics) { + if (exportOtelMetricsToEndpoint) { + if (otelEndpoint == null) { + throw new MissingArgumentException("endpoint is required to configure OpenTelemetry metrics export"); } + + } else { + LOGGER.warn("OpenTelemetry metrics are enabled but no endpoint is configured to export metrics"); } + } else { + LOGGER.warn("OpenTelemetry metrics are disabled"); } } - public VeniceMetricsConfig build() { + public VeniceMetricsConfig build() throws MissingArgumentException { checkAndSetDefaults(); return new VeniceMetricsConfig(this); } @@ -187,36 +281,40 @@ public String getMetricPrefix() { return this.metricPrefix; } - public boolean isEmitOpenTelemetryMetrics() { - return emitOpenTelemetryMetrics; + public boolean emitOtelMetrics() { + return emitOTelMetrics; + } + + public boolean exportOtelMetricsToEndpoint() { + return exportOtelMetricsToEndpoint; } - public boolean isEmitToHttpGrpcEndpoint() { - return emitToHttpGrpcEndpoint; + public String getOtelExportProtocol() { + return otelExportProtocol; } - public boolean isEmitToLog() { - return emitToLog; + public String getOtelEndpoint() { + return otelEndpoint; } - public VeniceOpenTelemetryMetricFormat getMetricFormat() { - return metricFormat; + public boolean exportOtelMetricsToLog() { + return exportOtelMetricsToLog; } - public boolean isUseExponentialHistogram() { - return useExponentialHistogram; + public Map getOtelHeaders() { + return otelHeaders; } - public int getExponentialHistogramMaxScale() { - return exponentialHistogramMaxScale; + public VeniceOpenTelemetryMetricNamingFormat getMetricNamingFormat() { + return metricNamingFormat; } - public int getExponentialHistogramMaxBuckets() { - return exponentialHistogramMaxBuckets; + public AggregationTemporalitySelector getOtelAggregationTemporalitySelector() { + return otelAggregationTemporalitySelector; } - public Map getOtelConfigs() { - return otelConfigs; + public DefaultAggregationSelector getOtelHistogramAggregationSelector() { + return otelHistogramAggregationSelector; } public MetricConfig getTehutiMetricConfig() { @@ -226,10 +324,11 @@ public MetricConfig getTehutiMetricConfig() { @Override public String toString() { return "VeniceMetricsConfig{" + "serviceName='" + serviceName + '\'' + ", metricPrefix='" + metricPrefix + '\'' - + ", emitOpenTelemetryMetrics=" + emitOpenTelemetryMetrics + ", emitTehutiMetrics=" + emitTehutiMetrics - + ", otelConfigs=" + otelConfigs + ", emitToHttpGrpcEndpoint=" + emitToHttpGrpcEndpoint + ", emitToLog=" - + emitToLog + ", metricFormat=" + metricFormat + ", useExponentialHistogram=" + useExponentialHistogram - + ", exponentialHistogramMaxScale=" + exponentialHistogramMaxScale + ", exponentialHistogramMaxBuckets=" - + exponentialHistogramMaxBuckets + ", tehutiMetricConfig=" + tehutiMetricConfig + '}'; + + ", emitOTelMetrics=" + emitOTelMetrics + ", exportOtelMetricsToEndpoint=" + exportOtelMetricsToEndpoint + + ", otelExportProtocol='" + otelExportProtocol + '\'' + ", otelEndpoint='" + otelEndpoint + '\'' + + ", otelHeaders=" + otelHeaders + ", exportOtelMetricsToLog=" + exportOtelMetricsToLog + + ", metricNamingFormat=" + metricNamingFormat + ", otelAggregationTemporalitySelector=" + + otelAggregationTemporalitySelector + ", otelHistogramAggregationSelector=" + otelHistogramAggregationSelector + + ", tehutiMetricConfig=" + tehutiMetricConfig + '}'; } } diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceMetricsRepository.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceMetricsRepository.java index 18110e69be..6d2d899441 100644 --- a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceMetricsRepository.java +++ b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceMetricsRepository.java @@ -10,6 +10,7 @@ import java.io.Closeable; import java.util.Map; import java.util.Objects; +import org.apache.commons.cli.MissingArgumentException; /** extends MetricsRepository to keep the changes to a minimum. Next step would be to create a MetricsRepository inside rather than extending it */ @@ -18,9 +19,9 @@ public class VeniceMetricsRepository extends MetricsRepository implements Closea private VeniceMetricsConfig veniceMetricsConfig; VeniceOpenTelemetryMetricsRepository openTelemetryMetricsRepository; - public VeniceMetricsRepository() { + public VeniceMetricsRepository() throws MissingArgumentException { super(); - this.veniceMetricsConfig = new VeniceMetricsConfig.VeniceMetricsConfigBuilder().build(); + this.veniceMetricsConfig = new VeniceMetricsConfig.Builder().build(); this.openTelemetryMetricsRepository = new VeniceOpenTelemetryMetricsRepository(veniceMetricsConfig); } diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceOpenTelemetryConfigProperties.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceOpenTelemetryConfigProperties.java deleted file mode 100644 index bc3e06c19d..0000000000 --- a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceOpenTelemetryConfigProperties.java +++ /dev/null @@ -1,284 +0,0 @@ -package com.linkedin.venice.stats; - -import static java.util.stream.Collectors.groupingBy; -import static java.util.stream.Collectors.joining; - -import io.opentelemetry.api.internal.ConfigUtil; -import io.opentelemetry.api.internal.StringUtils; -import io.opentelemetry.sdk.autoconfigure.spi.ConfigProperties; -import io.opentelemetry.sdk.autoconfigure.spi.ConfigurationException; -import java.time.Duration; -import java.util.AbstractMap; -import java.util.Arrays; -import java.util.Collections; -import java.util.HashMap; -import java.util.HashSet; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.concurrent.TimeUnit; -import java.util.function.Function; -import java.util.stream.Collectors; -import javax.annotation.Nullable; - - -/** - * copy of {@link io.opentelemetry.sdk.autoconfigure.spi.internal.DefaultConfigProperties} with - * modification to {@link #getList} to support comma separated values for 1 key. - * - * In original class: comma separated values will be considered as different key values - * In new class: comma separated values will be considered as 1 key value - * - * Copied the entire class DefaultConfigProperties rather than extending it because it is final class. - */ -public class VeniceOpenTelemetryConfigProperties implements ConfigProperties { - /** - * Properties are normalized to The properties for both of these will be normalized to be all lower - * case, dashses are replaces with periods, and environment variable underscores are replaces with - * periods. - * - *

This class is internal and is hence not for public use. Its APIs are unstable and can change - * at any time. - */ - - private final Map config; - - /** - * Creates a {@link VeniceOpenTelemetryConfigProperties} by merging system properties, environment variables, - * and the {@code defaultProperties}. - * - *

Environment variables take priority over {@code defaultProperties}. System properties take - * priority over environment variables. - */ - public static VeniceOpenTelemetryConfigProperties create(Map defaultProperties) { - return new VeniceOpenTelemetryConfigProperties(System.getProperties(), System.getenv(), defaultProperties); - } - - /** - * Create a {@link VeniceOpenTelemetryConfigProperties} from the {@code properties}, ignoring system - * properties and environment variables. - */ - public static VeniceOpenTelemetryConfigProperties createFromMap(Map properties) { - return new VeniceOpenTelemetryConfigProperties(properties, Collections.emptyMap(), Collections.emptyMap()); - } - - private VeniceOpenTelemetryConfigProperties( - Map systemProperties, - Map environmentVariables, - Map defaultProperties) { - Map config = new HashMap<>(); - defaultProperties.forEach((name, value) -> config.put(ConfigUtil.normalizePropertyKey(name), value)); - environmentVariables.forEach((name, value) -> config.put(ConfigUtil.normalizeEnvironmentVariableKey(name), value)); - systemProperties - .forEach((key, value) -> config.put(ConfigUtil.normalizePropertyKey(key.toString()), value.toString())); - - this.config = config; - } - - private VeniceOpenTelemetryConfigProperties( - VeniceOpenTelemetryConfigProperties previousProperties, - Map overrides) { - // previousProperties are already normalized, they can be copied as they are - Map config = new HashMap<>(previousProperties.config); - overrides.forEach((name, value) -> config.put(ConfigUtil.normalizePropertyKey(name), value)); - - this.config = config; - } - - @Override - @Nullable - public String getString(String name) { - return config.get(ConfigUtil.normalizePropertyKey(name)); - } - - @Override - @Nullable - public Boolean getBoolean(String name) { - String value = config.get(ConfigUtil.normalizePropertyKey(name)); - if (value == null || value.isEmpty()) { - return null; - } - return Boolean.parseBoolean(value); - } - - @Override - @Nullable - @SuppressWarnings("UnusedException") - public Integer getInt(String name) { - String value = config.get(ConfigUtil.normalizePropertyKey(name)); - if (value == null || value.isEmpty()) { - return null; - } - try { - return Integer.parseInt(value); - } catch (NumberFormatException ex) { - throw newInvalidPropertyException(name, value, "integer"); - } - } - - @Override - @Nullable - @SuppressWarnings("UnusedException") - public Long getLong(String name) { - String value = config.get(ConfigUtil.normalizePropertyKey(name)); - if (value == null || value.isEmpty()) { - return null; - } - try { - return Long.parseLong(value); - } catch (NumberFormatException ex) { - throw newInvalidPropertyException(name, value, "long"); - } - } - - @Override - @Nullable - @SuppressWarnings("UnusedException") - public Double getDouble(String name) { - String value = config.get(ConfigUtil.normalizePropertyKey(name)); - if (value == null || value.isEmpty()) { - return null; - } - try { - return Double.parseDouble(value); - } catch (NumberFormatException ex) { - throw newInvalidPropertyException(name, value, "double"); - } - } - - @Override - @Nullable - @SuppressWarnings("UnusedException") - public Duration getDuration(String name) { - String value = config.get(ConfigUtil.normalizePropertyKey(name)); - if (value == null || value.isEmpty()) { - return null; - } - String unitString = getUnitString(value); - // TODO: Environment variables have unknown encoding. `trim()` may cut codepoints oddly - // but likely we'll fail for malformed unit string either way. - String numberString = value.substring(0, value.length() - unitString.length()); - try { - long rawNumber = Long.parseLong(numberString.trim()); - TimeUnit unit = getDurationUnit(unitString.trim()); - return Duration.ofNanos(TimeUnit.NANOSECONDS.convert(rawNumber, unit)); - } catch (NumberFormatException ex) { - throw new ConfigurationException( - "Invalid duration property " + name + "=" + value + ". Expected number, found: " + numberString, - ex); - } catch (ConfigurationException ex) { - throw new ConfigurationException("Invalid duration property " + name + "=" + value + ". " + ex.getMessage()); - } - } - - @Override - public List getList(String name) { - String value = config.get(ConfigUtil.normalizePropertyKey(name)); - if (value == null) { - return Collections.emptyList(); - } - // TODO this is a bit of a hack to support comma separated value for 1 key to be passed in header - // return filterBlanksAndNulls(value.split(",")); - return filterBlanksAndNulls(new String[] { value }); - } - - /** - * Returns {@link ConfigProperties#getList(String)} as a {@link Set} after validating there are no - * duplicate entries. - * - * @throws ConfigurationException if {@code name} contains duplicate entries - */ - public static Set getSet(ConfigProperties config, String name) { - List list = config.getList(ConfigUtil.normalizePropertyKey(name)); - Set set = new HashSet<>(list); - if (set.size() != list.size()) { - String duplicates = list.stream() - .collect(groupingBy(Function.identity(), Collectors.counting())) - .entrySet() - .stream() - .filter(entry -> entry.getValue() > 1) - .map(Map.Entry::getKey) - .collect(joining(",", "[", "]")); - throw new ConfigurationException(name + " contains duplicates: " + duplicates); - } - return set; - } - - @Override - public Map getMap(String name) { - return getList(ConfigUtil.normalizePropertyKey(name)).stream().map(entry -> { - String[] split = entry.split("=", 2); - if (split.length != 2 || StringUtils.isNullOrEmpty(split[0])) { - throw new ConfigurationException("Invalid map property: " + name + "=" + config.get(name)); - } - return filterBlanksAndNulls(split); - }) - // Filter entries with an empty value, i.e. "foo=" - .filter(splitKeyValuePairs -> splitKeyValuePairs.size() == 2) - .map( - splitKeyValuePairs -> new AbstractMap.SimpleImmutableEntry<>( - splitKeyValuePairs.get(0), - splitKeyValuePairs.get(1))) - // If duplicate keys, prioritize later ones similar to duplicate system properties on a - // Java command line. - .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue, (first, next) -> next, LinkedHashMap::new)); - } - - /** - * Return a new {@link VeniceOpenTelemetryConfigProperties} by overriding the {@code previousProperties} with - * the {@code overrides}. - */ - public VeniceOpenTelemetryConfigProperties withOverrides(Map overrides) { - return new VeniceOpenTelemetryConfigProperties(this, overrides); - } - - private static ConfigurationException newInvalidPropertyException(String name, String value, String type) { - throw new ConfigurationException("Invalid value for property " + name + "=" + value + ". Must be a " + type + "."); - } - - private static List filterBlanksAndNulls(String[] values) { - return Arrays.stream(values).map(String::trim).filter(s -> !s.isEmpty()).collect(Collectors.toList()); - } - - /** Returns the TimeUnit associated with a unit string. Defaults to milliseconds. */ - private static TimeUnit getDurationUnit(String unitString) { - switch (unitString) { - case "us": - return TimeUnit.MICROSECONDS; - case "ns": - return TimeUnit.NANOSECONDS; - case "": // Fallthrough expected - case "ms": - return TimeUnit.MILLISECONDS; - case "s": - return TimeUnit.SECONDS; - case "m": - return TimeUnit.MINUTES; - case "h": - return TimeUnit.HOURS; - case "d": - return TimeUnit.DAYS; - default: - throw new ConfigurationException("Invalid duration string, found: " + unitString); - } - } - - /** - * Fragments the 'units' portion of a config value from the 'value' portion. - * - *

E.g. "1ms" would return the string "ms". - */ - private static String getUnitString(String rawValue) { - int lastDigitIndex = rawValue.length() - 1; - while (lastDigitIndex >= 0) { - char c = rawValue.charAt(lastDigitIndex); - if (Character.isDigit(c)) { - break; - } - lastDigitIndex -= 1; - } - // Pull everything after the last digit. - return rawValue.substring(lastDigitIndex + 1); - } -} diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceOpenTelemetryMetricFormat.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceOpenTelemetryMetricFormat.java deleted file mode 100644 index 27bb283eba..0000000000 --- a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceOpenTelemetryMetricFormat.java +++ /dev/null @@ -1,34 +0,0 @@ -package com.linkedin.venice.stats; - -import com.linkedin.venice.utils.VeniceEnumValue; - - -public enum VeniceOpenTelemetryMetricFormat implements VeniceEnumValue { - /** - * Default format if not configured, names are defined as per this. - * should use snake case as per https://opentelemetry.io/docs/specs/semconv/general/attribute-naming/ - * For example: http.response.status_code - */ - SNAKE_CASE(0), - /** - * Alternate format for attribute names. If configured, defined names in snake_case will be - * transformed to either one of below formats. - * - * camel case: For example, http.response.statusCode - * pascal case: For example, Http.Response.StatusCode - */ - CAMEL_CASE(1), PASCAL_CASE(2); - - private final int value; - - VeniceOpenTelemetryMetricFormat(int value) { - this.value = value; - } - - public static final int SIZE = values().length; - - @Override - public int getValue() { - return value; - } -} diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceOpenTelemetryMetricNamingFormat.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceOpenTelemetryMetricNamingFormat.java new file mode 100644 index 0000000000..c0878bcef9 --- /dev/null +++ b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceOpenTelemetryMetricNamingFormat.java @@ -0,0 +1,93 @@ +package com.linkedin.venice.stats; + +import com.linkedin.venice.utils.VeniceEnumValue; + + +public enum VeniceOpenTelemetryMetricNamingFormat implements VeniceEnumValue { + /** + * Default format if not configured, names are defined as per this. + * should use snake case as per https://opentelemetry.io/docs/specs/semconv/general/attribute-naming/ + * For example: http.response.status_code + */ + SNAKE_CASE(0), + /** + * Alternate format for attribute names. If configured, defined names in snake_case will be + * transformed to either one of below formats. + * + * camel case: For example, http.response.statusCode + * pascal case: For example, Http.Response.StatusCode + */ + CAMEL_CASE(1), PASCAL_CASE(2); + + private final int value; + + VeniceOpenTelemetryMetricNamingFormat(int value) { + this.value = value; + } + + public static final int SIZE = values().length; + + @Override + public int getValue() { + return value; + } + + /** + * validate whether the metric name is a valid {@link VeniceOpenTelemetryMetricNamingFormat#SNAKE_CASE} + */ + public static void validateMetricName(String name) { + if (name == null || name.isEmpty()) { + throw new IllegalArgumentException("Metric name cannot be null or empty. Input name: " + name); + } + if (name.contains(" ")) { + throw new IllegalArgumentException("Metric name cannot contain spaces. Input name: " + name); + } + // name should not contain any capital or special characters except for underscore and dot + if (!name.matches("^[a-z0-9_.]*$")) { + throw new IllegalArgumentException( + "Metric name can only contain lowercase alphabets, numbers, underscore and dot. Input name: " + name); + } + } + + public static String transformMetricName(String input, VeniceOpenTelemetryMetricNamingFormat metricFormat) { + if (metricFormat == SNAKE_CASE) { + // no transformation needed as it should be defined in snake case by default + validateMetricName(input); + return input; + } + String[] words = input.split("\\."); + for (int i = 0; i < words.length; i++) { + if (!words[i].isEmpty()) { + String[] partWords = words[i].split("_"); + for (int j = 0; j < partWords.length; j++) { + if (metricFormat == PASCAL_CASE || j > 0) { + // either pascal case or camel case except for the first word + partWords[j] = capitalizeFirstLetter(partWords[j]); + } + } + StringBuilder sb = new StringBuilder(); + for (String partWord: partWords) { + sb.append(partWord); + } + words[i] = sb.toString(); + } + } + StringBuilder finalName = new StringBuilder(); + for (String word: words) { + finalName.append(word); + finalName.append("."); + } + // remove the last dot + if (finalName.length() > 0) { + finalName.deleteCharAt(finalName.length() - 1); + } + return finalName.toString(); + } + + private static String capitalizeFirstLetter(String word) { + if (word.isEmpty()) { + return word; + } + return Character.toUpperCase(word.charAt(0)) + word.substring(1); + } +} diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceOpenTelemetryMetricsRepository.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceOpenTelemetryMetricsRepository.java index 8a8b26c1f3..b892ee0203 100644 --- a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceOpenTelemetryMetricsRepository.java +++ b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceOpenTelemetryMetricsRepository.java @@ -1,6 +1,7 @@ package com.linkedin.venice.stats; -import static com.linkedin.venice.stats.VeniceOpenTelemetryMetricFormat.PASCAL_CASE; +import static com.linkedin.venice.stats.VeniceOpenTelemetryMetricNamingFormat.transformMetricName; +import static com.linkedin.venice.stats.VeniceOpenTelemetryMetricNamingFormat.validateMetricName; import static io.opentelemetry.sdk.metrics.data.AggregationTemporality.DELTA; import com.linkedin.venice.exceptions.VeniceException; @@ -11,15 +12,13 @@ import io.opentelemetry.api.metrics.LongCounter; import io.opentelemetry.api.metrics.LongCounterBuilder; import io.opentelemetry.api.metrics.Meter; -import io.opentelemetry.exporter.otlp.internal.OtlpMetricExporterProvider; +import io.opentelemetry.exporter.otlp.http.metrics.OtlpHttpMetricExporter; +import io.opentelemetry.exporter.otlp.http.metrics.OtlpHttpMetricExporterBuilder; import io.opentelemetry.sdk.OpenTelemetrySdk; import io.opentelemetry.sdk.common.CompletableResultCode; -import io.opentelemetry.sdk.metrics.Aggregation; -import io.opentelemetry.sdk.metrics.InstrumentSelector; import io.opentelemetry.sdk.metrics.InstrumentType; import io.opentelemetry.sdk.metrics.SdkMeterProvider; import io.opentelemetry.sdk.metrics.SdkMeterProviderBuilder; -import io.opentelemetry.sdk.metrics.View; import io.opentelemetry.sdk.metrics.data.AggregationTemporality; import io.opentelemetry.sdk.metrics.data.MetricData; import io.opentelemetry.sdk.metrics.export.MetricExporter; @@ -28,16 +27,16 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; +import java.util.Map; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; public class VeniceOpenTelemetryMetricsRepository { private static final Logger LOGGER = LogManager.getLogger(VeniceOpenTelemetryMetricsRepository.class); - private OpenTelemetry openTelemetry = null; private SdkMeterProvider sdkMeterProvider = null; private boolean emitOpenTelemetryMetrics; - private VeniceOpenTelemetryMetricFormat metricFormat; + private VeniceOpenTelemetryMetricNamingFormat metricFormat; private Meter meter; private String metricPrefix; @@ -50,15 +49,25 @@ public class VeniceOpenTelemetryMetricsRepository { private final VeniceConcurrentHashMap counterMap = new VeniceConcurrentHashMap<>(); MetricExporter getOtlpHttpMetricExporter(VeniceMetricsConfig metricsConfig) { - OtlpMetricExporterProvider otlpMetricExporterProvider = new OtlpMetricExporterProvider(); - VeniceOpenTelemetryConfigProperties config = - VeniceOpenTelemetryConfigProperties.createFromMap(metricsConfig.getOtelConfigs()); - return otlpMetricExporterProvider.createExporter(config); + OtlpHttpMetricExporterBuilder exporterBuilder = + OtlpHttpMetricExporter.builder().setEndpoint(metricsConfig.getOtelEndpoint()); + for (Map.Entry entry: metricsConfig.getOtelHeaders().entrySet()) { + String key = entry.getKey(); + String value = entry.getValue(); + exporterBuilder.addHeader(key, value); + } + if (metricsConfig.getOtelAggregationTemporalitySelector() != null) { + exporterBuilder.setAggregationTemporalitySelector(metricsConfig.getOtelAggregationTemporalitySelector()); + } + if (metricsConfig.getOtelHistogramAggregationSelector() != null) { + exporterBuilder.setDefaultAggregationSelector(metricsConfig.getOtelHistogramAggregationSelector()); + } + return exporterBuilder.build(); } public VeniceOpenTelemetryMetricsRepository(VeniceMetricsConfig metricsConfig) { - emitOpenTelemetryMetrics = metricsConfig.isEmitOpenTelemetryMetrics(); - metricFormat = metricsConfig.getMetricFormat(); + emitOpenTelemetryMetrics = metricsConfig.emitOtelMetrics(); + metricFormat = metricsConfig.getMetricNamingFormat(); if (!emitOpenTelemetryMetrics) { LOGGER.info("OpenTelemetry metrics are disabled"); return; @@ -67,39 +76,24 @@ public VeniceOpenTelemetryMetricsRepository(VeniceMetricsConfig metricsConfig) { "OpenTelemetry initialization for {} started with config: {}", metricsConfig.getServiceName(), metricsConfig.toString()); - this.metricPrefix = transformMetricName("venice." + metricsConfig.getMetricPrefix()); + this.metricPrefix = transformMetricName("venice." + metricsConfig.getMetricPrefix(), metricFormat); try { SdkMeterProviderBuilder builder = SdkMeterProvider.builder(); - if (metricsConfig.isEmitToHttpGrpcEndpoint()) { + if (metricsConfig.exportOtelMetricsToEndpoint()) { MetricExporter httpExporter = getOtlpHttpMetricExporter(metricsConfig); builder.registerMetricReader(PeriodicMetricReader.builder(httpExporter).build()); } - if (metricsConfig.isEmitToLog()) { + if (metricsConfig.exportOtelMetricsToLog()) { // internal to test: Disabled by default builder.registerMetricReader(PeriodicMetricReader.builder(new LogBasedMetricExporter()).build()); } - if (metricsConfig.isUseExponentialHistogram()) { - /** - * {@link io.opentelemetry.exporter.internal.ExporterBuilderUtil#configureHistogramDefaultAggregation} - * doesn't take in buckets and scale configs. so using the below for now rather than passing these as - * configs to {@link #getOtlpHttpMetricExporter} - */ - builder.registerView( - InstrumentSelector.builder().setName("*").setType(InstrumentType.HISTOGRAM).build(), - View.builder() - .setAggregation( - Aggregation.base2ExponentialBucketHistogram( - metricsConfig.getExponentialHistogramMaxBuckets(), - metricsConfig.getExponentialHistogramMaxScale())) - .build()); - } builder.setResource(Resource.empty()); sdkMeterProvider = builder.build(); - // Register MeterProvider with OpenTelemetry instance - openTelemetry = OpenTelemetrySdk.builder().setMeterProvider(sdkMeterProvider).build(); + // Register MeterProvider with the OpenTelemetry instance + OpenTelemetry openTelemetry = OpenTelemetrySdk.builder().setMeterProvider(sdkMeterProvider).build(); this.meter = openTelemetry.getMeter(getMetricPrefix()); LOGGER.info( @@ -107,150 +101,57 @@ public VeniceOpenTelemetryMetricsRepository(VeniceMetricsConfig metricsConfig) { metricsConfig.getServiceName(), metricsConfig.toString()); } catch (Exception e) { - LOGGER.info( - "OpenTelemetry initialization for {} failed with config: {}", - metricsConfig.getServiceName(), - metricsConfig.toString(), - e); - throw new VeniceException("OpenTelemetry initialization for " + metricsConfig.getServiceName() + " failed", e); - } - } - - /** - * validate whether the metric name is a valid {@link VeniceOpenTelemetryMetricFormat#SNAKE_CASE} - */ - public static void validateMetricName(String name) { - if (name == null || name.isEmpty()) { - throw new IllegalArgumentException("Metric name cannot be null or empty. Input name: " + name); - } - if (name.contains(" ")) { - throw new IllegalArgumentException("Metric name cannot contain spaces. Input name: " + name); - } - // name should not contain any capital or special characters except for underscore and dot - if (!name.matches("^[a-z0-9_.]*$")) { - throw new IllegalArgumentException( - "Metric name can only contain lowercase alphabets, numbers, underscore and dot. Input name: " + name); + String err = "OpenTelemetry initialization for " + metricsConfig.getServiceName() + " failed with config: " + + metricsConfig.toString(); + LOGGER.error(err, e); + throw new VeniceException(err, e); } } String getFullMetricName(String metricPrefix, String name) { String fullMetricName = metricPrefix + "." + name; validateMetricName(fullMetricName); - return transformMetricName(fullMetricName); + return transformMetricName(fullMetricName, metricFormat); } private String getMetricPrefix() { return metricPrefix; } - /** - * Input should already be in {@link VeniceOpenTelemetryMetricFormat#SNAKE_CASE} as validated - * in {@link #validateMetricName}. - * - * If configured a different format, return the transformed format - */ - private String transformMetricName(String input) { - switch (metricFormat) { - case SNAKE_CASE: - return input; // input should be already in snake_case - case PASCAL_CASE: - case CAMEL_CASE: - return transformMetricName(input, metricFormat); - default: - throw new IllegalArgumentException("Unsupported metric format: " + metricFormat); - } - } - - public static String transformMetricName(String input, VeniceOpenTelemetryMetricFormat metricFormat) { - String[] words = input.split("\\."); - for (int i = 0; i < words.length; i++) { - if (!words[i].isEmpty()) { - String[] partWords = words[i].split("_"); - for (int j = 0; j < partWords.length; j++) { - if (metricFormat == PASCAL_CASE || j > 0) { - // either pascal case or camel case except for the first word - partWords[j] = capitalizeFirstLetter(partWords[j]); - } - } - StringBuilder sb = new StringBuilder(); - for (String partWord: partWords) { - sb.append(partWord); - } - words[i] = sb.toString(); - } - } - StringBuilder finalName = new StringBuilder(); - for (String word: words) { - finalName.append(word); - finalName.append("."); - } - // remove the last dot - if (finalName.length() > 0) { - finalName.deleteCharAt(finalName.length() - 1); - } - return finalName.toString(); - } - - private static String capitalizeFirstLetter(String word) { - if (word.isEmpty()) { - return word; - } - return Character.toUpperCase(word.charAt(0)) + word.substring(1); - } - public DoubleHistogram getHistogram(String name, String unit, String description) { - if (emitOpenTelemetryMetrics) { - String fullMetricName = getFullMetricName(getMetricPrefix(), name); - if (openTelemetry != null) { - return histogramMap.computeIfAbsent(name, key -> { - DoubleHistogramBuilder builder = - meter.histogramBuilder(fullMetricName).setUnit(unit).setDescription(description); - return builder.build(); - }); - } else { - LOGGER.error("Metric instrument creation failed for metric {} because OpenTelemetry is not initialized", name); - return null; - } - } else { + if (!emitOpenTelemetryMetrics) { return null; } + return histogramMap.computeIfAbsent(name, key -> { + String fullMetricName = getFullMetricName(getMetricPrefix(), name); + DoubleHistogramBuilder builder = meter.histogramBuilder(fullMetricName).setUnit(unit).setDescription(description); + return builder.build(); + }); } public DoubleHistogram getHistogramWithoutBuckets(String name, String unit, String description) { - if (emitOpenTelemetryMetrics) { - String fullMetricName = getFullMetricName(getMetricPrefix(), name); - if (openTelemetry != null) { - return histogramMap.computeIfAbsent(name, key -> { - DoubleHistogramBuilder builder = meter.histogramBuilder(fullMetricName) - .setExplicitBucketBoundariesAdvice(new ArrayList<>()) - .setUnit(unit) - .setDescription(description); - return builder.build(); - }); - } else { - LOGGER.error("Metric instrument creation failed for metric {} because OpenTelemetry is not initialized", name); - return null; - } - } else { + if (!emitOpenTelemetryMetrics) { return null; } + return histogramMap.computeIfAbsent(name, key -> { + String fullMetricName = getFullMetricName(getMetricPrefix(), name); + DoubleHistogramBuilder builder = meter.histogramBuilder(fullMetricName) + .setExplicitBucketBoundariesAdvice(new ArrayList<>()) + .setUnit(unit) + .setDescription(description); + return builder.build(); + }); } public LongCounter getCounter(String name, String unit, String description) { - if (emitOpenTelemetryMetrics) { - String fullMetricName = getFullMetricName(getMetricPrefix(), name); - if (openTelemetry != null) { - return counterMap.computeIfAbsent(name, key -> { - LongCounterBuilder builder = meter.counterBuilder(fullMetricName).setUnit(unit).setDescription(description); - return builder.build(); - }); - } else { - LOGGER.error("Metric instrument creation failed for metric {} because OpenTelemetry is not initialized", name); - return null; - } - } else { + if (!emitOpenTelemetryMetrics) { return null; } + return counterMap.computeIfAbsent(name, key -> { + String fullMetricName = getFullMetricName(getMetricPrefix(), name); + LongCounterBuilder builder = meter.counterBuilder(fullMetricName).setUnit(unit).setDescription(description); + return builder.build(); + }); } public void close() { @@ -287,10 +188,6 @@ public SdkMeterProvider getSdkMeterProvider() { return sdkMeterProvider; } - public OpenTelemetry getOpenTelemetry() { - return openTelemetry; - } - public Meter getMeter() { return meter; } diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/utils/VeniceProperties.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/utils/VeniceProperties.java index 06bdb5d03c..c3f50adc90 100644 --- a/internal/venice-client-common/src/main/java/com/linkedin/venice/utils/VeniceProperties.java +++ b/internal/venice-client-common/src/main/java/com/linkedin/venice/utils/VeniceProperties.java @@ -459,7 +459,7 @@ public boolean isEmpty() { return this.props.isEmpty(); } - public Map getPropsMap() { + public Map getAsMap() { return props; } } diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/utils/metrics/MetricsRepositoryUtils.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/utils/metrics/MetricsRepositoryUtils.java index 022e547ff7..70a51b9f1a 100644 --- a/internal/venice-client-common/src/main/java/com/linkedin/venice/utils/metrics/MetricsRepositoryUtils.java +++ b/internal/venice-client-common/src/main/java/com/linkedin/venice/utils/metrics/MetricsRepositoryUtils.java @@ -6,6 +6,7 @@ import io.tehuti.metrics.MetricsRepository; import io.tehuti.metrics.stats.AsyncGauge; import java.util.concurrent.TimeUnit; +import org.apache.commons.cli.MissingArgumentException; /** @@ -22,7 +23,7 @@ public static MetricsRepository createSingleThreadedMetricsRepository() { return createSingleThreadedMetricsRepository(TimeUnit.MINUTES.toMillis(1), 100); } - public static VeniceMetricsRepository createSingleThreadedVeniceMetricsRepository() { + public static VeniceMetricsRepository createSingleThreadedVeniceMetricsRepository() throws MissingArgumentException { return createSingleThreadedVeniceMetricsRepository(TimeUnit.MINUTES.toMillis(1), 100); } @@ -40,7 +41,7 @@ public static MetricsRepository createSingleThreadedMetricsRepository( public static VeniceMetricsRepository createSingleThreadedVeniceMetricsRepository( long maxMetricsMeasurementTimeoutMs, - long initialMetricsMeasurementTimeoutMs) { + long initialMetricsMeasurementTimeoutMs) throws MissingArgumentException { MetricConfig tehutiMetricsConfig = new MetricConfig( new AsyncGauge.AsyncGaugeExecutor.Builder().setMetricMeasurementThreadCount(1) .setSlowMetricMeasurementThreadCount(1) @@ -48,6 +49,6 @@ public static VeniceMetricsRepository createSingleThreadedVeniceMetricsRepositor .setMaxMetricsMeasurementTimeoutInMs(maxMetricsMeasurementTimeoutMs) .build()); return new VeniceMetricsRepository( - new VeniceMetricsConfig.VeniceMetricsConfigBuilder().setTehutiMetricConfig(tehutiMetricsConfig).build()); + new VeniceMetricsConfig.Builder().setTehutiMetricConfig(tehutiMetricsConfig).build()); } } diff --git a/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/VeniceMetricsConfigTest.java b/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/VeniceMetricsConfigTest.java index 1c1bd06b2b..bd1232db85 100644 --- a/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/VeniceMetricsConfigTest.java +++ b/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/VeniceMetricsConfigTest.java @@ -2,40 +2,52 @@ import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertFalse; -import static org.testng.Assert.assertThrows; +import static org.testng.Assert.assertNotNull; import static org.testng.Assert.assertTrue; -import com.linkedin.venice.exceptions.VeniceException; -import com.linkedin.venice.stats.VeniceMetricsConfig.VeniceMetricsConfigBuilder; +import com.linkedin.venice.stats.VeniceMetricsConfig.Builder; +import io.opentelemetry.exporter.otlp.internal.OtlpConfigUtil; +import io.opentelemetry.sdk.metrics.InstrumentType; +import io.opentelemetry.sdk.metrics.export.AggregationTemporalitySelector; import io.tehuti.metrics.MetricConfig; import java.util.HashMap; import java.util.Map; +import org.apache.commons.cli.MissingArgumentException; import org.testng.annotations.Test; public class VeniceMetricsConfigTest { + @Test(expectedExceptions = MissingArgumentException.class) + public void testDefaultValuesThrowsException() throws MissingArgumentException { + new Builder().build(); + } + @Test - public void testDefaultValues() { - VeniceMetricsConfig config = new VeniceMetricsConfigBuilder().build(); + public void testDefaultValuesWithBasicConfig() throws MissingArgumentException { + VeniceMetricsConfig config = new Builder().setServiceName("noop_service").setMetricPrefix("service").build(); assertEquals(config.getServiceName(), "noop_service"); assertEquals(config.getMetricPrefix(), "service"); - assertFalse(config.isEmitOpenTelemetryMetrics()); - assertFalse(config.isEmitToHttpGrpcEndpoint()); - assertFalse(config.isEmitToLog()); - assertTrue(config.isUseExponentialHistogram()); - assertEquals(config.getExponentialHistogramMaxScale(), 3); - assertEquals(config.getExponentialHistogramMaxBuckets(), 250); + assertFalse(config.emitOtelMetrics()); + assertFalse(config.exportOtelMetricsToEndpoint()); + assertEquals(config.getOtelExportProtocol(), OtlpConfigUtil.PROTOCOL_HTTP_PROTOBUF); + assertEquals(config.getOtelEndpoint(), null); + assertTrue(config.getOtelHeaders().isEmpty()); + assertFalse(config.exportOtelMetricsToLog()); + assertEquals(config.getMetricNamingFormat(), VeniceOpenTelemetryMetricNamingFormat.SNAKE_CASE); + assertEquals(config.getOtelAggregationTemporalitySelector(), AggregationTemporalitySelector.deltaPreferred()); + assertEquals(config.getOtelHistogramAggregationSelector(), null); + assertNotNull(config.getTehutiMetricConfig()); } @Test - public void testCustomValues() { + public void testCustomValues() throws MissingArgumentException { Map otelConfigs = new HashMap<>(); otelConfigs.put("otel.venice.enabled", "true"); otelConfigs.put("otel.venice.export.to.log", "true"); MetricConfig metricConfig = new MetricConfig(); - VeniceMetricsConfig config = new VeniceMetricsConfigBuilder().setServiceName("TestService") + VeniceMetricsConfig config = new Builder().setServiceName("TestService") .setMetricPrefix("TestPrefix") .setTehutiMetricConfig(metricConfig) .extractAndSetOtelConfigs(otelConfigs) @@ -43,65 +55,129 @@ public void testCustomValues() { assertEquals(config.getServiceName(), "TestService"); assertEquals(config.getMetricPrefix(), "TestPrefix"); - assertTrue(config.isEmitOpenTelemetryMetrics()); - assertTrue(config.getOtelConfigs().containsKey("otel.venice.enabled")); - assertTrue(config.isEmitToLog()); + assertTrue(config.emitOtelMetrics()); + assertTrue(config.exportOtelMetricsToLog()); assertEquals(config.getTehutiMetricConfig(), metricConfig); } - @Test - public void testOtelMissingConfigs() { + @Test(expectedExceptions = MissingArgumentException.class) + public void testOtelMissingConfigs() throws MissingArgumentException { Map invalidOtelConfigs = new HashMap<>(); invalidOtelConfigs.put("otel.venice.enabled", "true"); - invalidOtelConfigs.put("otel.venice.export.to.http.grpc.endpoint", "true"); + invalidOtelConfigs.put("otel.venice.export.to.endpoint", "true"); - VeniceMetricsConfigBuilder builder = new VeniceMetricsConfigBuilder().extractAndSetOtelConfigs(invalidOtelConfigs); + new Builder().setServiceName("TestService") + .setMetricPrefix("TestPrefix") + .extractAndSetOtelConfigs(invalidOtelConfigs) + .build(); + } - // should throw exception because required configs are missing - assertThrows(VeniceException.class, builder::build); + @Test(expectedExceptions = IllegalArgumentException.class) + public void testOtelConfigWithInvalidMetricFormat() throws MissingArgumentException { + Map otelConfigs = new HashMap<>(); + otelConfigs.put("otel.venice.metrics.format", "INVALID_FORMAT"); + + new Builder().extractAndSetOtelConfigs(otelConfigs).build(); } @Test - public void testGetMetricsPrefix() { - assertEquals(VeniceMetricsConfigBuilder.getMetricsPrefix("venice-router"), "router"); - assertEquals(VeniceMetricsConfigBuilder.getMetricsPrefix("service_name"), "name"); - assertEquals(VeniceMetricsConfigBuilder.getMetricsPrefix("com.linkedin.service"), "service"); + public void testOtelConfigWithValidMetricFormat() throws MissingArgumentException { + Map otelConfigs = new HashMap<>(); + otelConfigs.put("otel.venice.enabled", "true"); + otelConfigs.put("otel.venice.metrics.format", "CAMEL_CASE"); + + VeniceMetricsConfig config = new Builder().setServiceName("TestService") + .setMetricPrefix("TestPrefix") + .extractAndSetOtelConfigs(otelConfigs) + .build(); + + assertEquals(config.getMetricNamingFormat(), VeniceOpenTelemetryMetricNamingFormat.CAMEL_CASE); } @Test - public void testOtelConfigWithInvalidMetricFormat() { + public void testEnableHttpGrpcEndpointConfigWithRequiredFields() throws MissingArgumentException { Map otelConfigs = new HashMap<>(); - otelConfigs.put("otel.venice.metrics.format", "INVALID_FORMAT"); + otelConfigs.put("otel.venice.enabled", "true"); + otelConfigs.put("otel.venice.export.to.endpoint", "true"); + otelConfigs.put("otel.exporter.otlp.metrics.protocol", OtlpConfigUtil.PROTOCOL_HTTP_PROTOBUF); + otelConfigs.put("otel.exporter.otlp.metrics.endpoint", "http://localhost"); - VeniceMetricsConfig config = new VeniceMetricsConfigBuilder().extractAndSetOtelConfigs(otelConfigs).build(); + VeniceMetricsConfig config = new Builder().setServiceName("TestService") + .setMetricPrefix("TestPrefix") + .extractAndSetOtelConfigs(otelConfigs) + .build(); - assertEquals( - config.getMetricFormat(), - VeniceOpenTelemetryMetricFormat.SNAKE_CASE, - "Invalid metric format should fall back to default."); + assertTrue(config.exportOtelMetricsToEndpoint()); + assertEquals(config.getOtelExportProtocol(), OtlpConfigUtil.PROTOCOL_HTTP_PROTOBUF); + assertEquals(config.getOtelEndpoint(), "http://localhost"); } @Test - public void testOtelConfigWithValidMetricFormat() { + public void testSetAggregationTemporalitySelector() throws MissingArgumentException { Map otelConfigs = new HashMap<>(); otelConfigs.put("otel.venice.enabled", "true"); - otelConfigs.put("otel.venice.metrics.format", "CAMEL_CASE"); + otelConfigs.put("otel.venice.export.to.endpoint", "true"); + otelConfigs.put("otel.exporter.otlp.metrics.protocol", OtlpConfigUtil.PROTOCOL_HTTP_PROTOBUF); + otelConfigs.put("otel.exporter.otlp.metrics.endpoint", "http://localhost"); + otelConfigs.put("otel.exporter.otlp.metrics.temporality.preference", "delta"); + + VeniceMetricsConfig config = new Builder().setServiceName("TestService") + .setMetricPrefix("TestPrefix") + .extractAndSetOtelConfigs(otelConfigs) + .build(); + assertEquals(config.getOtelAggregationTemporalitySelector(), AggregationTemporalitySelector.deltaPreferred()); + } - VeniceMetricsConfig config = new VeniceMetricsConfigBuilder().extractAndSetOtelConfigs(otelConfigs).build(); + @Test(expectedExceptions = IllegalArgumentException.class) + public void testSetAggregationTemporalitySelectorInvalidConfig() throws MissingArgumentException { + Map otelConfigs = new HashMap<>(); + otelConfigs.put("otel.venice.enabled", "true"); + otelConfigs.put("otel.venice.export.to.endpoint", "true"); + otelConfigs.put("otel.exporter.otlp.metrics.protocol", OtlpConfigUtil.PROTOCOL_HTTP_PROTOBUF); + otelConfigs.put("otel.exporter.otlp.metrics.endpoint", "http://localhost"); + otelConfigs.put("otel.exporter.otlp.metrics.temporality.preference", "invalid"); - assertEquals(config.getMetricFormat(), VeniceOpenTelemetryMetricFormat.CAMEL_CASE); + VeniceMetricsConfig config = new Builder().setServiceName("TestService") + .setMetricPrefix("TestPrefix") + .extractAndSetOtelConfigs(otelConfigs) + .build(); + assertEquals(config.getOtelAggregationTemporalitySelector(), AggregationTemporalitySelector.deltaPreferred()); } @Test - public void testEnableHttpGrpcEndpointConfigWithRequiredFields() { + public void testSetHistogramAggregationSelector() throws MissingArgumentException { Map otelConfigs = new HashMap<>(); otelConfigs.put("otel.venice.enabled", "true"); - otelConfigs.put("otel.venice.export.to.http.grpc.endpoint", "true"); - otelConfigs.put("otel.exporter.otlp.metrics.protocol", "http/protobuf"); + otelConfigs.put("otel.venice.export.to.endpoint", "true"); + otelConfigs.put("otel.exporter.otlp.metrics.protocol", OtlpConfigUtil.PROTOCOL_HTTP_PROTOBUF); otelConfigs.put("otel.exporter.otlp.metrics.endpoint", "http://localhost"); + otelConfigs.put("otel.exporter.otlp.metrics.default.histogram.aggregation", "base2_exponential_bucket_histogram"); + otelConfigs.put("otel.exporter.otlp.metrics.default.histogram.aggregation.max.scale", "10"); + otelConfigs.put("otel.exporter.otlp.metrics.default.histogram.aggregation.max.buckets", "50"); - VeniceMetricsConfig config = new VeniceMetricsConfigBuilder().extractAndSetOtelConfigs(otelConfigs).build(); + VeniceMetricsConfig config = new Builder().setServiceName("TestService") + .setMetricPrefix("TestPrefix") + .extractAndSetOtelConfigs(otelConfigs) + .build(); + assertEquals( + config.getOtelHistogramAggregationSelector().getDefaultAggregation(InstrumentType.HISTOGRAM).toString(), + "Base2ExponentialHistogramAggregation{maxBuckets=50,maxScale=10}"); + } + + @Test(expectedExceptions = IllegalArgumentException.class) + public void testSetHistogramAggregationSelectorInvalidConfig() throws MissingArgumentException { + Map otelConfigs = new HashMap<>(); + otelConfigs.put("otel.venice.enabled", "true"); + otelConfigs.put("otel.venice.export.to.endpoint", "true"); + otelConfigs.put("otel.exporter.otlp.metrics.protocol", OtlpConfigUtil.PROTOCOL_HTTP_PROTOBUF); + otelConfigs.put("otel.exporter.otlp.metrics.endpoint", "http://localhost"); + otelConfigs.put("otel.exporter.otlp.metrics.default.histogram.aggregation", "invalid"); + otelConfigs.put("otel.exporter.otlp.metrics.default.histogram.aggregation.max.scale", "10"); + otelConfigs.put("otel.exporter.otlp.metrics.default.histogram.aggregation.max.buckets", "50"); - assertTrue(config.isEmitToHttpGrpcEndpoint()); + new Builder().setServiceName("TestService") + .setMetricPrefix("TestPrefix") + .extractAndSetOtelConfigs(otelConfigs) + .build(); } } diff --git a/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/VeniceMetricsRepositoryTest.java b/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/VeniceMetricsRepositoryTest.java index d2de313da6..940ee20b0e 100644 --- a/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/VeniceMetricsRepositoryTest.java +++ b/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/VeniceMetricsRepositoryTest.java @@ -9,6 +9,7 @@ import io.tehuti.metrics.Sensor; import io.tehuti.metrics.stats.AsyncGauge; import java.util.Map; +import org.apache.commons.cli.MissingArgumentException; import org.mockito.Mockito; import org.testng.annotations.AfterMethod; import org.testng.annotations.BeforeMethod; @@ -21,8 +22,9 @@ public class VeniceMetricsRepositoryTest { private MetricsRepository mockDelegate; @BeforeMethod - public void setUp() { - VeniceMetricsConfig config = new VeniceMetricsConfig.VeniceMetricsConfigBuilder().build(); + public void setUp() throws MissingArgumentException { + VeniceMetricsConfig config = + new VeniceMetricsConfig.Builder().setServiceName("test").setMetricPrefix("test").build(); mockOpenTelemetryMetricsRepository = Mockito.mock(VeniceOpenTelemetryMetricsRepository.class); mockDelegate = Mockito.mock(MetricsRepository.class); metricsRepository = new VeniceMetricsRepository(mockDelegate, config, mockOpenTelemetryMetricsRepository); @@ -34,8 +36,9 @@ public void tearDown() { } @Test - public void testConstructorWithDelegateAndConfig() { - VeniceMetricsConfig config = new VeniceMetricsConfig.VeniceMetricsConfigBuilder().build(); + public void testConstructorWithDelegateAndConfig() throws MissingArgumentException { + VeniceMetricsConfig config = + new VeniceMetricsConfig.Builder().setServiceName("test").setMetricPrefix("test").build(); VeniceMetricsRepository repo = new VeniceMetricsRepository(mockDelegate, config, mockOpenTelemetryMetricsRepository); diff --git a/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/VeniceOpenTelemetryMetricsRepositoryTest.java b/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/VeniceOpenTelemetryMetricsRepositoryTest.java index 85b9d109cc..b6cb77b13b 100644 --- a/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/VeniceOpenTelemetryMetricsRepositoryTest.java +++ b/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/VeniceOpenTelemetryMetricsRepositoryTest.java @@ -1,18 +1,17 @@ package com.linkedin.venice.stats; +import static com.linkedin.venice.stats.VeniceOpenTelemetryMetricNamingFormat.transformMetricName; +import static com.linkedin.venice.stats.VeniceOpenTelemetryMetricNamingFormat.validateMetricName; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertNotNull; import static org.testng.Assert.assertNull; import static org.testng.Assert.assertSame; -import static org.testng.Assert.fail; import io.opentelemetry.api.metrics.DoubleHistogram; import io.opentelemetry.api.metrics.LongCounter; import io.opentelemetry.sdk.metrics.export.MetricExporter; import java.util.HashMap; -import org.mockito.Mock; import org.mockito.Mockito; -import org.mockito.MockitoAnnotations; import org.testng.annotations.AfterMethod; import org.testng.annotations.BeforeMethod; import org.testng.annotations.Test; @@ -21,19 +20,18 @@ public class VeniceOpenTelemetryMetricsRepositoryTest { private VeniceOpenTelemetryMetricsRepository metricsRepository; - @Mock private VeniceMetricsConfig mockMetricsConfig; @BeforeMethod public void setUp() { - MockitoAnnotations.initMocks(this); - - Mockito.when(mockMetricsConfig.isEmitOpenTelemetryMetrics()).thenReturn(true); - Mockito.when(mockMetricsConfig.getMetricFormat()).thenReturn(VeniceOpenTelemetryMetricFormat.SNAKE_CASE); + mockMetricsConfig = Mockito.mock(VeniceMetricsConfig.class); + Mockito.when(mockMetricsConfig.emitOtelMetrics()).thenReturn(true); + Mockito.when(mockMetricsConfig.getMetricNamingFormat()) + .thenReturn(VeniceOpenTelemetryMetricNamingFormat.SNAKE_CASE); Mockito.when(mockMetricsConfig.getMetricPrefix()).thenReturn("test_prefix"); Mockito.when(mockMetricsConfig.getServiceName()).thenReturn("test_service"); - Mockito.when(mockMetricsConfig.isEmitToHttpGrpcEndpoint()).thenReturn(true); - Mockito.when(mockMetricsConfig.isUseExponentialHistogram()).thenReturn(false); + Mockito.when(mockMetricsConfig.exportOtelMetricsToEndpoint()).thenReturn(true); + Mockito.when(mockMetricsConfig.getOtelEndpoint()).thenReturn("http://localhost:4318"); metricsRepository = new VeniceOpenTelemetryMetricsRepository(mockMetricsConfig); } @@ -46,19 +44,17 @@ public void tearDown() { @Test public void testConstructorInitialize() { // Check if OpenTelemetry and SdkMeterProvider are initialized correctly - assertNotNull(metricsRepository.getOpenTelemetry()); assertNotNull(metricsRepository.getSdkMeterProvider()); assertNotNull(metricsRepository.getMeter()); } @Test public void testConstructorWithEmitDisabled() { - Mockito.when(mockMetricsConfig.isEmitOpenTelemetryMetrics()).thenReturn(false); + Mockito.when(mockMetricsConfig.emitOtelMetrics()).thenReturn(false); VeniceOpenTelemetryMetricsRepository metricsRepository = new VeniceOpenTelemetryMetricsRepository(mockMetricsConfig); // Verify that metrics-related fields are null when metrics are disabled - assertNull(metricsRepository.getOpenTelemetry()); assertNull(metricsRepository.getSdkMeterProvider()); assertNull(metricsRepository.getMeter()); assertNull(metricsRepository.getHistogram("test", "unit", "desc")); @@ -69,55 +65,39 @@ public void testConstructorWithEmitDisabled() { public void testGetOtlpHttpMetricExporterWithValidConfig() { HashMap otelConfigs = new HashMap<>(); otelConfigs.put("otel.exporter.otlp.endpoint", "http://localhost:4318"); - Mockito.when(mockMetricsConfig.getOtelConfigs()).thenReturn(otelConfigs); MetricExporter exporter = metricsRepository.getOtlpHttpMetricExporter(mockMetricsConfig); // Verify that the exporter is not null and is of the expected type assertNotNull(exporter); - - // Check that the exporter uses the correct endpoint - assertEquals(otelConfigs.get("otel.exporter.otlp.endpoint"), "http://localhost:4318"); - } - - @Test - public void testGetOtlpHttpMetricExporterWithEmptyConfig() { - Mockito.when(mockMetricsConfig.getOtelConfigs()).thenReturn(new HashMap<>()); - - try { - MetricExporter exporter = metricsRepository.getOtlpHttpMetricExporter(mockMetricsConfig); - assertNotNull(exporter, "Exporter should be created even with an empty config."); - } catch (Exception e) { - fail("Exporter creation should not throw an exception with empty config."); - } } @Test(expectedExceptions = IllegalArgumentException.class) public void testValidateMetricNameWithNullName() { - VeniceOpenTelemetryMetricsRepository.validateMetricName(null); + validateMetricName(null); } @Test(expectedExceptions = IllegalArgumentException.class) public void testValidateMetricNameWithEmptyName() { - VeniceOpenTelemetryMetricsRepository.validateMetricName(""); + validateMetricName(""); } @Test(expectedExceptions = IllegalArgumentException.class) public void testValidateMetricNameWithInvalidName() { - VeniceOpenTelemetryMetricsRepository.validateMetricName("Invalid Name!"); + validateMetricName("Invalid Name!"); } @Test public void testTransformMetricName() { - Mockito.when(mockMetricsConfig.getMetricFormat()).thenReturn(VeniceOpenTelemetryMetricFormat.SNAKE_CASE); + Mockito.when(mockMetricsConfig.getMetricNamingFormat()) + .thenReturn(VeniceOpenTelemetryMetricNamingFormat.SNAKE_CASE); assertEquals(metricsRepository.getFullMetricName("prefix", "metric_name"), "prefix.metric_name"); - String transformedName = VeniceOpenTelemetryMetricsRepository - .transformMetricName("test.test_metric_name", VeniceOpenTelemetryMetricFormat.PASCAL_CASE); + String transformedName = + transformMetricName("test.test_metric_name", VeniceOpenTelemetryMetricNamingFormat.PASCAL_CASE); assertEquals(transformedName, "Test.TestMetricName"); - transformedName = VeniceOpenTelemetryMetricsRepository - .transformMetricName("test.test_metric_name", VeniceOpenTelemetryMetricFormat.CAMEL_CASE); + transformedName = transformMetricName("test.test_metric_name", VeniceOpenTelemetryMetricNamingFormat.CAMEL_CASE); assertEquals(transformedName, "test.testMetricName"); } diff --git a/internal/venice-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceHttpResponseStatusCodeCategory.java b/internal/venice-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceHttpResponseStatusCodeCategory.java index dce14cc69e..add7f23427 100644 --- a/internal/venice-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceHttpResponseStatusCodeCategory.java +++ b/internal/venice-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceHttpResponseStatusCodeCategory.java @@ -1,108 +1,35 @@ package com.linkedin.venice.stats.dimensions; +import io.netty.handler.codec.http.HttpResponseStatus; +import io.netty.handler.codec.http.HttpStatusClass; + + /** - * Copied {@link io.netty.handler.codec.http.HttpStatusClass} and modified it to have 1xx, 2xx, etc. as categories + * Maps the provided HTTP response status {@link HttpResponseStatus} to one of + * 1xx, 2xx, 3xx, 4xx, 5xx categories. */ -public enum VeniceHttpResponseStatusCodeCategory { - INFORMATIONAL(100, 200, "1xx"), - /** - * The success class (2xx) - */ - SUCCESS(200, 300, "2xx"), - /** - * The redirection class (3xx) - */ - REDIRECTION(300, 400, "3xx"), - /** - * The client error class (4xx) - */ - CLIENT_ERROR(400, 500, "4xx"), - /** - * The server error class (5xx) - */ - SERVER_ERROR(500, 600, "5xx"), - /** - * The unknown class - */ - UNKNOWN(0, 0, "Unknown") { - @Override - public boolean contains(int code) { - return code < 100 || code >= 600; - } - }; +public class VeniceHttpResponseStatusCodeCategory { + private static final String UNKNOWN_CATEGORY = "unknown"; - /** - * Returns the class of the specified HTTP status code. - */ - public static VeniceHttpResponseStatusCodeCategory valueOf(int code) { - if (INFORMATIONAL.contains(code)) { - return INFORMATIONAL; - } - if (SUCCESS.contains(code)) { - return SUCCESS; - } - if (REDIRECTION.contains(code)) { - return REDIRECTION; - } - if (CLIENT_ERROR.contains(code)) { - return CLIENT_ERROR; + public static String getVeniceHttpResponseStatusCodeCategory(HttpResponseStatus statusCode) { + if (statusCode == null) { + return UNKNOWN_CATEGORY; } - if (SERVER_ERROR.contains(code)) { - return SERVER_ERROR; - } - return UNKNOWN; - } - /** - * Returns the class of the specified HTTP status code. - * @param code Just the numeric portion of the http status code. - */ - public static VeniceHttpResponseStatusCodeCategory valueOf(CharSequence code) { - if (code != null && code.length() == 3) { - char c0 = code.charAt(0); - return isDigit(c0) && isDigit(code.charAt(1)) && isDigit(code.charAt(2)) ? valueOf(digit(c0) * 100) : UNKNOWN; + HttpStatusClass statusClass = statusCode.codeClass(); + switch (statusClass) { + case INFORMATIONAL: + return "1xx"; + case SUCCESS: + return "2xx"; + case REDIRECTION: + return "3xx"; + case CLIENT_ERROR: + return "4xx"; + case SERVER_ERROR: + return "5xx"; + default: + return UNKNOWN_CATEGORY; } - return UNKNOWN; - } - - private static int digit(char c) { - return c - '0'; - } - - private static boolean isDigit(char c) { - return c >= '0' && c <= '9'; - } - - private final int min; - private final int max; - private final String category; - - VeniceHttpResponseStatusCodeCategory(int min, int max, String category) { - this.min = min; - this.max = max; - this.category = category; - } - - /** - * Returns {@code true} if and only if the specified HTTP status code falls into this class. - */ - public boolean contains(int code) { - return code >= min && code < max; - } - - /** - * Returns the category of this HTTP status class. - */ - public String getCategory() { - return category; - } - - // used for tests - public int getMin() { - return min; - } - - public int getMax() { - return max; } } diff --git a/internal/venice-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceMetricsDimensions.java b/internal/venice-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceMetricsDimensions.java index 52cb21f610..f588f61977 100644 --- a/internal/venice-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceMetricsDimensions.java +++ b/internal/venice-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceMetricsDimensions.java @@ -1,24 +1,24 @@ package com.linkedin.venice.stats.dimensions; -import static com.linkedin.venice.stats.VeniceOpenTelemetryMetricFormat.CAMEL_CASE; -import static com.linkedin.venice.stats.VeniceOpenTelemetryMetricFormat.PASCAL_CASE; -import static com.linkedin.venice.stats.VeniceOpenTelemetryMetricFormat.SNAKE_CASE; -import static com.linkedin.venice.stats.VeniceOpenTelemetryMetricsRepository.transformMetricName; -import static com.linkedin.venice.stats.VeniceOpenTelemetryMetricsRepository.validateMetricName; +import static com.linkedin.venice.stats.VeniceOpenTelemetryMetricNamingFormat.CAMEL_CASE; +import static com.linkedin.venice.stats.VeniceOpenTelemetryMetricNamingFormat.PASCAL_CASE; +import static com.linkedin.venice.stats.VeniceOpenTelemetryMetricNamingFormat.SNAKE_CASE; +import static com.linkedin.venice.stats.VeniceOpenTelemetryMetricNamingFormat.transformMetricName; +import static com.linkedin.venice.stats.VeniceOpenTelemetryMetricNamingFormat.validateMetricName; -import com.linkedin.venice.stats.VeniceOpenTelemetryMetricFormat; +import com.linkedin.venice.stats.VeniceOpenTelemetryMetricNamingFormat; public enum VeniceMetricsDimensions { VENICE_STORE_NAME("venice.store.name"), VENICE_CLUSTER_NAME("venice.cluster.name"), - /** {@link com.linkedin.venice.read.RequestType#requestTypeName} */ + /** {@link com.linkedin.venice.read.RequestType} */ VENICE_REQUEST_METHOD("venice.request.method"), /** {@link io.netty.handler.codec.http.HttpResponseStatus} ie. 200, 400, etc */ HTTP_RESPONSE_STATUS_CODE("http.response.status_code"), - /** {@link VeniceHttpResponseStatusCodeCategory#category} ie. 1xx, 2xx, etc */ + /** {@link VeniceHttpResponseStatusCodeCategory} ie. 1xx, 2xx, etc */ HTTP_RESPONSE_STATUS_CODE_CATEGORY("http.response.status_code_category"), /** {@link VeniceRequestValidationOutcome#outcome} */ @@ -33,7 +33,7 @@ public enum VeniceMetricsDimensions { /** {@link VeniceRequestRetryAbortReason} */ VENICE_REQUEST_RETRY_ABORT_REASON("venice.request.retry_abort_reason"); - private final String[] dimensionName = new String[VeniceOpenTelemetryMetricFormat.SIZE]; + private final String[] dimensionName = new String[VeniceOpenTelemetryMetricNamingFormat.SIZE]; VeniceMetricsDimensions(String dimensionName) { validateMetricName(dimensionName); @@ -42,7 +42,7 @@ public enum VeniceMetricsDimensions { this.dimensionName[PASCAL_CASE.getValue()] = transformMetricName(dimensionName, PASCAL_CASE); } - public String getDimensionName(VeniceOpenTelemetryMetricFormat format) { + public String getDimensionName(VeniceOpenTelemetryMetricNamingFormat format) { return dimensionName[format.getValue()]; } } diff --git a/internal/venice-common/src/test/java/com/linkedin/venice/stats/dimensions/VeniceHttpResponseStatusCodeCategoryTest.java b/internal/venice-common/src/test/java/com/linkedin/venice/stats/dimensions/VeniceHttpResponseStatusCodeCategoryTest.java index 2ba9e55dba..6cc28e4ebe 100644 --- a/internal/venice-common/src/test/java/com/linkedin/venice/stats/dimensions/VeniceHttpResponseStatusCodeCategoryTest.java +++ b/internal/venice-common/src/test/java/com/linkedin/venice/stats/dimensions/VeniceHttpResponseStatusCodeCategoryTest.java @@ -1,56 +1,25 @@ package com.linkedin.venice.stats.dimensions; +import static com.linkedin.venice.stats.dimensions.VeniceHttpResponseStatusCodeCategory.getVeniceHttpResponseStatusCodeCategory; import static org.testng.Assert.assertEquals; -import static org.testng.Assert.assertNotEquals; +import io.netty.handler.codec.http.HttpResponseStatus; import org.testng.annotations.Test; public class VeniceHttpResponseStatusCodeCategoryTest { @Test() public void testValues() { - for (VeniceHttpResponseStatusCodeCategory category: VeniceHttpResponseStatusCodeCategory.values()) { - switch (category) { - case INFORMATIONAL: - assertEquals(category.getCategory(), "1xx"); - assertEquals(category.getMin(), 100); - assertEquals(category.getMax(), 200); - break; - case SUCCESS: - assertEquals(category.getCategory(), "2xx"); - assertEquals(category.getMin(), 200); - assertEquals(category.getMax(), 300); - break; - case REDIRECTION: - assertEquals(category.getCategory(), "3xx"); - assertEquals(category.getMin(), 300); - assertEquals(category.getMax(), 400); - break; - case CLIENT_ERROR: - assertEquals(category.getCategory(), "4xx"); - assertEquals(category.getMin(), 400); - assertEquals(category.getMax(), 500); - break; - case SERVER_ERROR: - assertEquals(category.getCategory(), "5xx"); - assertEquals(category.getMin(), 500); - assertEquals(category.getMax(), 600); - break; - case UNKNOWN: - assertEquals(category.getCategory(), "Unknown"); - assertEquals(category.getMin(), 0); - assertEquals(category.getMax(), 0); - break; - default: - throw new IllegalArgumentException("Unknown category: " + category); - } - } + assertEquals(getVeniceHttpResponseStatusCodeCategory(HttpResponseStatus.PROCESSING), "1xx"); + assertEquals(getVeniceHttpResponseStatusCodeCategory(HttpResponseStatus.OK), "2xx"); + assertEquals(getVeniceHttpResponseStatusCodeCategory(HttpResponseStatus.MOVED_PERMANENTLY), "3xx"); + assertEquals(getVeniceHttpResponseStatusCodeCategory(HttpResponseStatus.BAD_REQUEST), "4xx"); + assertEquals(getVeniceHttpResponseStatusCodeCategory(HttpResponseStatus.INTERNAL_SERVER_ERROR), "5xx"); } @Test public void testUnknownCategory() { - assertEquals(VeniceHttpResponseStatusCodeCategory.valueOf(99), VeniceHttpResponseStatusCodeCategory.UNKNOWN); - assertNotEquals(VeniceHttpResponseStatusCodeCategory.valueOf(100), VeniceHttpResponseStatusCodeCategory.UNKNOWN); - assertEquals(VeniceHttpResponseStatusCodeCategory.valueOf(600), VeniceHttpResponseStatusCodeCategory.UNKNOWN); + assertEquals(getVeniceHttpResponseStatusCodeCategory(HttpResponseStatus.valueOf(99)), "unknown"); + assertEquals(getVeniceHttpResponseStatusCodeCategory(HttpResponseStatus.valueOf(600)), "unknown"); } } diff --git a/internal/venice-common/src/test/java/com/linkedin/venice/stats/dimensions/VeniceMetricsDimensionsTest.java b/internal/venice-common/src/test/java/com/linkedin/venice/stats/dimensions/VeniceMetricsDimensionsTest.java index 1e459f40ed..b7442d60b6 100644 --- a/internal/venice-common/src/test/java/com/linkedin/venice/stats/dimensions/VeniceMetricsDimensionsTest.java +++ b/internal/venice-common/src/test/java/com/linkedin/venice/stats/dimensions/VeniceMetricsDimensionsTest.java @@ -2,14 +2,14 @@ import static org.testng.Assert.assertEquals; -import com.linkedin.venice.stats.VeniceOpenTelemetryMetricFormat; +import com.linkedin.venice.stats.VeniceOpenTelemetryMetricNamingFormat; import org.testng.annotations.Test; public class VeniceMetricsDimensionsTest { @Test public void testGetDimensionNameInSnakeCase() { - VeniceOpenTelemetryMetricFormat format = VeniceOpenTelemetryMetricFormat.SNAKE_CASE; + VeniceOpenTelemetryMetricNamingFormat format = VeniceOpenTelemetryMetricNamingFormat.SNAKE_CASE; for (VeniceMetricsDimensions dimension: VeniceMetricsDimensions.values()) { switch (dimension) { case VENICE_STORE_NAME: @@ -47,7 +47,7 @@ public void testGetDimensionNameInSnakeCase() { @Test public void testGetDimensionNameInCamelCase() { - VeniceOpenTelemetryMetricFormat format = VeniceOpenTelemetryMetricFormat.CAMEL_CASE; + VeniceOpenTelemetryMetricNamingFormat format = VeniceOpenTelemetryMetricNamingFormat.CAMEL_CASE; for (VeniceMetricsDimensions dimension: VeniceMetricsDimensions.values()) { switch (dimension) { case VENICE_STORE_NAME: @@ -85,7 +85,7 @@ public void testGetDimensionNameInCamelCase() { @Test public void testGetDimensionNameInPascalCase() { - VeniceOpenTelemetryMetricFormat format = VeniceOpenTelemetryMetricFormat.PASCAL_CASE; + VeniceOpenTelemetryMetricNamingFormat format = VeniceOpenTelemetryMetricNamingFormat.PASCAL_CASE; for (VeniceMetricsDimensions dimension: VeniceMetricsDimensions.values()) { switch (dimension) { case VENICE_STORE_NAME: diff --git a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/integration/utils/VeniceRouterWrapper.java b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/integration/utils/VeniceRouterWrapper.java index b5092d71fa..39f4cc85dc 100644 --- a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/integration/utils/VeniceRouterWrapper.java +++ b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/integration/utils/VeniceRouterWrapper.java @@ -48,6 +48,7 @@ import java.util.Optional; import java.util.Properties; import java.util.concurrent.TimeUnit; +import org.apache.commons.cli.MissingArgumentException; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; @@ -155,10 +156,13 @@ static StatefulServiceProvider generateService( .put(ROUTER_STORAGE_NODE_CLIENT_TYPE, StorageNodeClientType.APACHE_HTTP_ASYNC_CLIENT.name()) .put("otel.venice.enabled", Boolean.TRUE.toString()) .put("otel.venice.export.to.log", Boolean.TRUE.toString()) - .put("otel.venice.export.to.http.grpc.endpoint", Boolean.TRUE.toString()) + .put("otel.venice.export.to.endpoint", Boolean.TRUE.toString()) .put("otel.exporter.otlp.metrics.protocol", "http/protobuf") .put("otel.exporter.otlp.metrics.endpoint", "http://localhost:4318/v1/metrics") .put("otel.exporter.otlp.metrics.temporality.preference", "delta") + .put("otel.exporter.otlp.metrics.default.histogram.aggregation", "base2_exponential_bucket_histogram") + .put("otel.exporter.otlp.metrics.default.histogram.aggregation.max.scale", 3) + .put("otel.exporter.otlp.metrics.default.histogram.aggregation.max.buckets", 250) .put(properties); // setup d2 config first @@ -185,7 +189,7 @@ static StatefulServiceProvider generateService( TehutiUtils.getVeniceMetricsRepository( ROUTER_SERVICE_NAME, ROUTER_SERVICE_METRIC_PREFIX, - routerProperties.getPropsMap()), + routerProperties.getAsMap()), D2TestUtils.getAndStartD2Client(zkAddress), CLUSTER_DISCOVERY_D2_SERVICE_NAME); return new VeniceRouterWrapper( @@ -234,7 +238,7 @@ protected void internalStop() throws Exception { } @Override - protected void newProcess() { + protected void newProcess() throws MissingArgumentException { String httpURI = "http://" + getHost() + ":" + getPort(); String httpsURI = "https://" + getHost() + ":" + getSslPort(); @@ -248,7 +252,7 @@ protected void newProcess() { Optional.empty(), Optional.of(SslUtils.getVeniceLocalSslFactory()), TehutiUtils - .getVeniceMetricsRepository(ROUTER_SERVICE_NAME, ROUTER_SERVICE_METRIC_PREFIX, properties.getPropsMap()), + .getVeniceMetricsRepository(ROUTER_SERVICE_NAME, ROUTER_SERVICE_METRIC_PREFIX, properties.getAsMap()), D2TestUtils.getAndStartD2Client(zkAddress), CLUSTER_DISCOVERY_D2_SERVICE_NAME); LOGGER.info("Started VeniceRouterWrapper: {}", this); diff --git a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/router/api/TestVeniceDispatcher.java b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/router/api/TestVeniceDispatcher.java index 1349a56226..86a71586d3 100644 --- a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/router/api/TestVeniceDispatcher.java +++ b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/router/api/TestVeniceDispatcher.java @@ -56,6 +56,7 @@ import java.util.Optional; import java.util.concurrent.Executor; import java.util.concurrent.TimeUnit; +import org.apache.commons.cli.MissingArgumentException; import org.apache.http.client.methods.HttpGet; import org.testng.Assert; import org.testng.annotations.Test; @@ -64,7 +65,7 @@ //TODO: refactor Dispatcher to take a HttpClient Factory, so we don't need to spin up an HTTP server for these tests public class TestVeniceDispatcher { @Test - public void testErrorRetry() { + public void testErrorRetry() throws MissingArgumentException { VeniceDispatcher dispatcher = getMockDispatcher(false, false); try { AsyncPromise> mockResponseFuture = mock(AsyncPromise.class); @@ -93,7 +94,7 @@ public void testErrorRetry() { } @Test - public void testErrorRetryOnPendingCheckFail() { + public void testErrorRetryOnPendingCheckFail() throws MissingArgumentException { VeniceDispatcher dispatcher = getMockDispatcher(true, false); try { AsyncPromise> mockResponseFuture = mock(AsyncPromise.class); @@ -123,7 +124,7 @@ public void testErrorRetryOnPendingCheckFail() { } @Test - public void testErrorRetryOnPendingCheckLeak() { + public void testErrorRetryOnPendingCheckLeak() throws MissingArgumentException { VeniceDispatcher dispatcher = getMockDispatcher(false, true); try { AsyncPromise> mockResponseFuture = mock(AsyncPromise.class); @@ -153,7 +154,7 @@ public void testErrorRetryOnPendingCheckLeak() { } @Test - public void passesThroughHttp429() { + public void passesThroughHttp429() throws MissingArgumentException { VeniceDispatcher dispatcher = getMockDispatcher(false, false); try { AsyncPromise> mockResponseFuture = mock(AsyncPromise.class); @@ -182,7 +183,7 @@ public void passesThroughHttp429() { } @Test - public void passThroughCompressedDataIfClientSupportsDecompressionForSingleGet() { + public void passThroughCompressedDataIfClientSupportsDecompressionForSingleGet() throws MissingArgumentException { VeniceDispatcher dispatcher = getMockDispatcher(false, false); try { AsyncPromise> mockResponseFuture = mock(AsyncPromise.class); @@ -215,7 +216,7 @@ public void passThroughCompressedDataIfClientSupportsDecompressionForSingleGet() } @Test - public void decompressRecordIfClientDoesntSupportsDecompressionForSingleGet() { + public void decompressRecordIfClientDoesntSupportsDecompressionForSingleGet() throws MissingArgumentException { VeniceDispatcher dispatcher = getMockDispatcher(false, false); try { AsyncPromise> mockResponseFuture = mock(AsyncPromise.class); @@ -247,7 +248,7 @@ public void decompressRecordIfClientDoesntSupportsDecompressionForSingleGet() { } @Test - public void passThroughCompressedDataIfClientSupportsDecompressionForMultiGet() { + public void passThroughCompressedDataIfClientSupportsDecompressionForMultiGet() throws MissingArgumentException { VeniceDispatcher dispatcher = getMockDispatcher(false, false); try { AsyncPromise> mockResponseFuture = mock(AsyncPromise.class); @@ -279,7 +280,7 @@ public void passThroughCompressedDataIfClientSupportsDecompressionForMultiGet() } @Test - public void decompressRecordIfClientDoesntSupportsDecompressionForMultiGet() { + public void decompressRecordIfClientDoesntSupportsDecompressionForMultiGet() throws MissingArgumentException { VeniceDispatcher dispatcher = getMockDispatcher(false, false); try { AsyncPromise> mockResponseFuture = mock(AsyncPromise.class); @@ -310,7 +311,8 @@ public void decompressRecordIfClientDoesntSupportsDecompressionForMultiGet() { } } - private VeniceDispatcher getMockDispatcher(boolean forcePendingCheck, boolean forceLeakPending) { + private VeniceDispatcher getMockDispatcher(boolean forcePendingCheck, boolean forceLeakPending) + throws MissingArgumentException { VeniceRouterConfig routerConfig = mock(VeniceRouterConfig.class); doReturn(2).when(routerConfig).getHttpClientPoolSize(); doReturn(10).when(routerConfig).getMaxOutgoingConn(); diff --git a/services/venice-router/src/main/java/com/linkedin/venice/router/RouterServer.java b/services/venice-router/src/main/java/com/linkedin/venice/router/RouterServer.java index eea6ce6d35..15e9d33bbe 100644 --- a/services/venice-router/src/main/java/com/linkedin/venice/router/RouterServer.java +++ b/services/venice-router/src/main/java/com/linkedin/venice/router/RouterServer.java @@ -124,6 +124,7 @@ import java.util.function.Consumer; import java.util.function.LongSupplier; import javax.annotation.Nonnull; +import org.apache.commons.cli.MissingArgumentException; import org.apache.helix.InstanceType; import org.apache.helix.manager.zk.ZKHelixManager; import org.apache.helix.zookeeper.impl.client.ZkClient; @@ -195,8 +196,8 @@ public class RouterServer extends AbstractVeniceService { // A map of optional ChannelHandlers that retains insertion order to be added at the end of the router pipeline private final Map optionalChannelHandlers = new LinkedHashMap<>(); - private static final String ROUTER_SERVICE_NAME = "venice-router"; - private static final String ROUTER_SERVICE_METRIC_PREFIX = "router"; + public static final String ROUTER_SERVICE_NAME = "venice-router"; + public static final String ROUTER_SERVICE_METRIC_PREFIX = "router"; /** * Thread number used to monitor the listening port; @@ -269,14 +270,14 @@ public RouterServer( VeniceProperties properties, List serviceDiscoveryAnnouncers, Optional accessController, - Optional sslFactory) { + Optional sslFactory) throws MissingArgumentException { this( properties, serviceDiscoveryAnnouncers, accessController, sslFactory, TehutiUtils - .getVeniceMetricsRepository(ROUTER_SERVICE_NAME, ROUTER_SERVICE_METRIC_PREFIX, properties.getPropsMap()), + .getVeniceMetricsRepository(ROUTER_SERVICE_NAME, ROUTER_SERVICE_METRIC_PREFIX, properties.getAsMap()), null, "venice-discovery"); } @@ -309,7 +310,7 @@ public RouterServer( Optional sslFactory, MetricsRepository metricsRepository, D2Client d2Client, - String d2ServiceName) { + String d2ServiceName) throws MissingArgumentException { this( properties, serviceDiscoveryAnnouncers, @@ -317,8 +318,8 @@ public RouterServer( sslFactory, new VeniceMetricsRepository( metricsRepository, - new VeniceMetricsConfig.VeniceMetricsConfigBuilder().setServiceName("venice-router") - .extractAndSetOtelConfigs(properties.getPropsMap()) + new VeniceMetricsConfig.Builder().setServiceName(ROUTER_SERVICE_NAME) + .extractAndSetOtelConfigs(properties.getAsMap()) .build()), d2Client, d2ServiceName); @@ -435,15 +436,15 @@ public RouterServer( HelixReadOnlyStoreConfigRepository storeConfigRepository, List serviceDiscoveryAnnouncers, Optional sslFactory, - HelixLiveInstanceMonitor liveInstanceMonitor) { + HelixLiveInstanceMonitor liveInstanceMonitor) throws MissingArgumentException { this( properties, serviceDiscoveryAnnouncers, Optional.empty(), sslFactory, new VeniceMetricsRepository( - new VeniceMetricsConfig.VeniceMetricsConfigBuilder().setServiceName("venice-router") - .extractAndSetOtelConfigs(properties.getPropsMap()) + new VeniceMetricsConfig.Builder().setServiceName(ROUTER_SERVICE_NAME) + .extractAndSetOtelConfigs(properties.getAsMap()) .build()), false); this.routingDataRepository = routingDataRepository; diff --git a/services/venice-router/src/main/java/com/linkedin/venice/router/api/VeniceResponseAggregator.java b/services/venice-router/src/main/java/com/linkedin/venice/router/api/VeniceResponseAggregator.java index 1275ff4c01..338d4b63b4 100644 --- a/services/venice-router/src/main/java/com/linkedin/venice/router/api/VeniceResponseAggregator.java +++ b/services/venice-router/src/main/java/com/linkedin/venice/router/api/VeniceResponseAggregator.java @@ -243,6 +243,7 @@ public FullHttpResponse buildResponse( // TODO: When a batch get throws a quota exception, the ROUTER_SERVER_TIME is missing, so we can't record anything // here... double latency = LatencyUtils.convertNSToMS(timeValue.getRawValue(TimeUnit.NANOSECONDS)); + stats.recordLatency(storeName, latency); if (HEALTHY_STATUSES.contains(httpResponseStatus)) { routerStats.getStatsByType(RequestType.SINGLE_GET) .recordReadQuotaUsage(storeName, venicePath.getPartitionKeys().size()); diff --git a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouterHttpRequestStats.java b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouterHttpRequestStats.java index 9927ecbbd4..d5753de7f2 100644 --- a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouterHttpRequestStats.java +++ b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouterHttpRequestStats.java @@ -1,6 +1,9 @@ package com.linkedin.venice.router.stats; +import static com.linkedin.venice.router.RouterServer.ROUTER_SERVICE_METRIC_PREFIX; +import static com.linkedin.venice.router.RouterServer.ROUTER_SERVICE_NAME; import static com.linkedin.venice.stats.AbstractVeniceAggStats.STORE_NAME_FOR_TOTAL_STAT; +import static com.linkedin.venice.stats.dimensions.VeniceHttpResponseStatusCodeCategory.getVeniceHttpResponseStatusCodeCategory; import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.HTTP_RESPONSE_STATUS_CODE; import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.HTTP_RESPONSE_STATUS_CODE_CATEGORY; import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.VENICE_CLUSTER_NAME; @@ -19,8 +22,7 @@ import com.linkedin.venice.stats.TehutiUtils; import com.linkedin.venice.stats.VeniceMetricsConfig; import com.linkedin.venice.stats.VeniceMetricsRepository; -import com.linkedin.venice.stats.VeniceOpenTelemetryMetricFormat; -import com.linkedin.venice.stats.dimensions.VeniceHttpResponseStatusCodeCategory; +import com.linkedin.venice.stats.VeniceOpenTelemetryMetricNamingFormat; import com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions; import com.linkedin.venice.stats.dimensions.VeniceRequestRetryAbortReason; import com.linkedin.venice.stats.dimensions.VeniceRequestRetryType; @@ -44,12 +46,25 @@ import io.tehuti.metrics.stats.Total; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; +import org.apache.commons.cli.MissingArgumentException; public class RouterHttpRequestStats extends AbstractVeniceHttpStats { private static final MetricConfig METRIC_CONFIG = new MetricConfig().timeWindow(10, TimeUnit.SECONDS); - private static final VeniceMetricsRepository localMetricRepo = new VeniceMetricsRepository( - new VeniceMetricsConfig.VeniceMetricsConfigBuilder().setTehutiMetricConfig(METRIC_CONFIG).build()); + private static final VeniceMetricsRepository localMetricRepo; + + static { + try { + localMetricRepo = new VeniceMetricsRepository( + new VeniceMetricsConfig.Builder().setServiceName(ROUTER_SERVICE_NAME) + .setMetricPrefix(ROUTER_SERVICE_METRIC_PREFIX) + .setTehutiMetricConfig(METRIC_CONFIG) + .build()); + } catch (MissingArgumentException e) { + throw new RuntimeException(e); + } + } + private final static Sensor totalInflightRequestSensor = localMetricRepo.sensor("total_inflight_request"); static { totalInflightRequestSensor.add("total_inflight_request_count", new Rate()); @@ -126,9 +141,9 @@ public class RouterHttpRequestStats extends AbstractVeniceHttpStats { private final Sensor errorRetryAttemptTriggeredByPendingRequestCheckSensor; private final String systemStoreName; - private final Attributes otelMetricDimensions; + private final Attributes commonMetricDimensions; private final boolean emitOpenTelemetryMetrics; - private final VeniceOpenTelemetryMetricFormat openTelemetryMetricFormat; + private final VeniceOpenTelemetryMetricNamingFormat openTelemetryMetricFormat; // QPS metrics public RouterHttpRequestStats( @@ -139,11 +154,11 @@ public RouterHttpRequestStats( ScatterGatherStats scatterGatherStats, boolean isKeyValueProfilingEnabled) { super(metricsRepository, storeName, requestType); - emitOpenTelemetryMetrics = metricsRepository.getVeniceMetricsConfig().isEmitOpenTelemetryMetrics(); - openTelemetryMetricFormat = metricsRepository.getVeniceMetricsConfig().getMetricFormat(); - otelMetricDimensions = Attributes.builder() + emitOpenTelemetryMetrics = metricsRepository.getVeniceMetricsConfig().emitOtelMetrics(); + openTelemetryMetricFormat = metricsRepository.getVeniceMetricsConfig().getMetricNamingFormat(); + commonMetricDimensions = Attributes.builder() .put(getDimensionName(VENICE_STORE_NAME), storeName) - .put(getDimensionName(VENICE_REQUEST_METHOD), requestType.getRequestTypeName()) + .put(getDimensionName(VENICE_REQUEST_METHOD), requestType.name().toLowerCase()) .put(getDimensionName(VENICE_CLUSTER_NAME), clusterName) .build(); @@ -294,7 +309,7 @@ public void recordIncomingRequest() { inFlightRequestSensor.record(currentInFlightRequest.incrementAndGet()); totalInflightRequestSensor.record(); if (emitOpenTelemetryMetrics) { - incomingRequestSensorOtel.add(1, otelMetricDimensions); + incomingRequestSensorOtel.add(1, commonMetricDimensions); } } @@ -363,7 +378,7 @@ public void recordErrorRetryCount() { public void recordRetryTriggeredSensorOtel(VeniceRequestRetryType retryType) { if (emitOpenTelemetryMetrics) { Attributes dimensions = Attributes.builder() - .putAll(otelMetricDimensions) + .putAll(commonMetricDimensions) .put(getDimensionName(VENICE_REQUEST_RETRY_TYPE), retryType.getRetryType()) .build(); retryTriggeredSensorOtel.add(1, dimensions); @@ -373,7 +388,7 @@ public void recordRetryTriggeredSensorOtel(VeniceRequestRetryType retryType) { public void recordAbortedRetrySensorOtel(VeniceRequestRetryAbortReason abortReason) { if (emitOpenTelemetryMetrics) { Attributes dimensions = Attributes.builder() - .putAll(otelMetricDimensions) + .putAll(commonMetricDimensions) .put(getDimensionName(VENICE_REQUEST_RETRY_ABORT_REASON), abortReason.getAbortReason()) .build(); abortedRetrySensorOtel.add(1, dimensions); @@ -412,11 +427,11 @@ public void recordLatencySensorOtel( VeniceResponseStatusCategory veniceResponseStatusCategory) { if (emitOpenTelemetryMetrics) { Attributes dimensions = Attributes.builder() - .putAll(otelMetricDimensions) + .putAll(commonMetricDimensions) // only add HTTP_RESPONSE_STATUS_CODE_CATEGORY to reduce the cardinality for histogram .put( getDimensionName(HTTP_RESPONSE_STATUS_CODE_CATEGORY), - VeniceHttpResponseStatusCodeCategory.valueOf(responseStatus.code()).getCategory()) + getVeniceHttpResponseStatusCodeCategory(responseStatus)) .put(getDimensionName(VENICE_RESPONSE_STATUS_CODE_CATEGORY), veniceResponseStatusCategory.getCategory()) .build(); latencySensorOtel.record(latency, dimensions); @@ -428,10 +443,10 @@ public void recordRequestSensorOtel( VeniceResponseStatusCategory veniceResponseStatusCategory) { if (emitOpenTelemetryMetrics) { Attributes dimensions = Attributes.builder() - .putAll(otelMetricDimensions) + .putAll(commonMetricDimensions) .put( getDimensionName(HTTP_RESPONSE_STATUS_CODE_CATEGORY), - VeniceHttpResponseStatusCodeCategory.valueOf(responseStatus.code()).getCategory()) + getVeniceHttpResponseStatusCodeCategory(responseStatus)) .put(getDimensionName(VENICE_RESPONSE_STATUS_CODE_CATEGORY), veniceResponseStatusCategory.getCategory()) .put(getDimensionName(HTTP_RESPONSE_STATUS_CODE), responseStatus.codeAsText().toString()) .build(); @@ -478,7 +493,7 @@ public void recordKeyCountSensorOtel(int keyNum, VeniceRequestValidationOutcome keyNumSensor.record(keyNum); if (emitOpenTelemetryMetrics) { Attributes dimensions = Attributes.builder() - .putAll(otelMetricDimensions) + .putAll(commonMetricDimensions) .put(getDimensionName(VENICE_REQUEST_VALIDATION_OUTCOME), outcome.getOutcome()) .build(); keyCountSensorOtel.record(keyNum, dimensions); @@ -542,12 +557,12 @@ public void recordResponse() { public void recordAllowedRetryRequest() { allowedRetryRequestSensor.record(); - allowedRetryRequestSensorOtel.add(1, otelMetricDimensions); + allowedRetryRequestSensorOtel.add(1, commonMetricDimensions); } public void recordDisallowedRetryRequest() { disallowedRetryRequestSensor.record(); - disallowedRetryRequestSensorOtel.add(1, otelMetricDimensions); + disallowedRetryRequestSensorOtel.add(1, commonMetricDimensions); } public void recordErrorRetryAttemptTriggeredByPendingRequestCheck() { @@ -557,7 +572,7 @@ public void recordErrorRetryAttemptTriggeredByPendingRequestCheck() { public void recordRetryDelay(double delay) { retryDelaySensor.record(delay); if (emitOpenTelemetryMetrics) { - retryDelaySensorOtel.record(delay, otelMetricDimensions); + retryDelaySensorOtel.record(delay, commonMetricDimensions); } } diff --git a/services/venice-router/src/test/java/com/linkedin/venice/router/AggRouterHttpRequestStatsTest.java b/services/venice-router/src/test/java/com/linkedin/venice/router/AggRouterHttpRequestStatsTest.java index 2af917f1bd..0551ed7b6b 100644 --- a/services/venice-router/src/test/java/com/linkedin/venice/router/AggRouterHttpRequestStatsTest.java +++ b/services/venice-router/src/test/java/com/linkedin/venice/router/AggRouterHttpRequestStatsTest.java @@ -7,6 +7,7 @@ import com.linkedin.venice.router.stats.AggRouterHttpRequestStats; import com.linkedin.venice.stats.VeniceMetricsRepository; import com.linkedin.venice.tehuti.MockTehutiReporter; +import org.apache.commons.cli.MissingArgumentException; import org.mockito.Mockito; import org.testng.Assert; import org.testng.annotations.BeforeSuite; @@ -19,7 +20,7 @@ public class AggRouterHttpRequestStatsTest { private ReadOnlyStoreRepository storeMetadataRepository; @BeforeSuite - public void setUp() { + public void setUp() throws MissingArgumentException { this.metricsRepository = new VeniceMetricsRepository(); reporter = new MockTehutiReporter(); metricsRepository.addReporter(reporter); diff --git a/services/venice-router/src/test/java/com/linkedin/venice/router/RouteHttpRequestStatsTest.java b/services/venice-router/src/test/java/com/linkedin/venice/router/RouteHttpRequestStatsTest.java index fdfa96bb08..86e42b7dd8 100644 --- a/services/venice-router/src/test/java/com/linkedin/venice/router/RouteHttpRequestStatsTest.java +++ b/services/venice-router/src/test/java/com/linkedin/venice/router/RouteHttpRequestStatsTest.java @@ -10,6 +10,7 @@ import com.linkedin.venice.stats.VeniceMetricsRepository; import com.linkedin.venice.tehuti.MockTehutiReporter; import com.linkedin.venice.utils.metrics.MetricsRepositoryUtils; +import org.apache.commons.cli.MissingArgumentException; import org.testng.Assert; import org.testng.annotations.BeforeSuite; import org.testng.annotations.Test; @@ -21,7 +22,7 @@ public class RouteHttpRequestStatsTest { private RouterHttpRequestStats routerHttpRequestStats; @BeforeSuite - public void setUp() { + public void setUp() throws MissingArgumentException { VeniceMetricsRepository metrics = MetricsRepositoryUtils.createSingleThreadedVeniceMetricsRepository(); reporter = new MockTehutiReporter(); metrics.addReporter(reporter); diff --git a/services/venice-router/src/test/java/com/linkedin/venice/router/api/TestVeniceDelegateMode.java b/services/venice-router/src/test/java/com/linkedin/venice/router/api/TestVeniceDelegateMode.java index 01377fc91b..55b60a648d 100644 --- a/services/venice-router/src/test/java/com/linkedin/venice/router/api/TestVeniceDelegateMode.java +++ b/services/venice-router/src/test/java/com/linkedin/venice/router/api/TestVeniceDelegateMode.java @@ -53,6 +53,7 @@ import java.util.Map; import java.util.Set; import javax.annotation.Nonnull; +import org.apache.commons.cli.MissingArgumentException; import org.apache.http.client.methods.HttpUriRequest; import org.mockito.ArgumentCaptor; import org.testng.Assert; @@ -222,14 +223,18 @@ private VenicePathParser getPathParser() { @BeforeClass public void setUp() { - RouterExceptionAndTrackingUtils.setRouterStats( - new RouterStats<>( - requestType -> new AggRouterHttpRequestStats( - new VeniceMetricsRepository(), - "test-cluster", - requestType, - mock(ReadOnlyStoreRepository.class), - true))); + RouterExceptionAndTrackingUtils.setRouterStats(new RouterStats<>(requestType -> { + try { + return new AggRouterHttpRequestStats( + new VeniceMetricsRepository(), + "test-cluster", + requestType, + mock(ReadOnlyStoreRepository.class), + true); + } catch (MissingArgumentException e) { + throw new RuntimeException(e); + } + })); } @AfterClass @@ -375,16 +380,18 @@ public void testLeastLoadedOnSlowHosts() throws RouterException { VeniceRouterConfig config = mock(VeniceRouterConfig.class); doReturn(LEAST_LOADED_ROUTING).when(config).getMultiKeyRoutingStrategy(); - VeniceDelegateMode scatterMode = new VeniceDelegateMode( - config, - new RouterStats<>( - requestType -> new AggRouterHttpRequestStats( - new VeniceMetricsRepository(), - "test-cluster", - requestType, - mock(ReadOnlyStoreRepository.class), - true)), - mock(RouteHttpRequestStats.class)); + VeniceDelegateMode scatterMode = new VeniceDelegateMode(config, new RouterStats<>(requestType -> { + try { + return new AggRouterHttpRequestStats( + new VeniceMetricsRepository(), + "test-cluster", + requestType, + mock(ReadOnlyStoreRepository.class), + true); + } catch (MissingArgumentException e) { + throw new RuntimeException(e); + } + }), mock(RouteHttpRequestStats.class)); scatterMode.initReadRequestThrottler(throttler); Scatter finalScatter = scatterMode @@ -613,7 +620,7 @@ public void testScatterWithStreamingMultiGet() throws RouterException { } @Test - public void testScatterForMultiGetWithHelixAssistedRouting() throws RouterException { + public void testScatterForMultiGetWithHelixAssistedRouting() throws RouterException, MissingArgumentException { String storeName = Utils.getUniqueString("test_store"); int version = 1; String resourceName = storeName + "_v" + version; diff --git a/services/venice-router/src/test/java/com/linkedin/venice/router/api/TestVenicePathParser.java b/services/venice-router/src/test/java/com/linkedin/venice/router/api/TestVenicePathParser.java index 2ab4e77908..2ef0d0c291 100644 --- a/services/venice-router/src/test/java/com/linkedin/venice/router/api/TestVenicePathParser.java +++ b/services/venice-router/src/test/java/com/linkedin/venice/router/api/TestVenicePathParser.java @@ -52,6 +52,7 @@ import java.util.Map; import java.util.UUID; import java.util.concurrent.ScheduledExecutorService; +import org.apache.commons.cli.MissingArgumentException; import org.mockito.Mockito; import org.testng.Assert; import org.testng.annotations.AfterClass; @@ -97,14 +98,18 @@ RouterStats getMockedStats() { @BeforeClass public void setUp() { - RouterExceptionAndTrackingUtils.setRouterStats( - new RouterStats<>( - requestType -> new AggRouterHttpRequestStats( - new VeniceMetricsRepository(), - CLUSTER, - requestType, - mock(ReadOnlyStoreRepository.class), - true))); + RouterExceptionAndTrackingUtils.setRouterStats(new RouterStats<>(requestType -> { + try { + return new AggRouterHttpRequestStats( + new VeniceMetricsRepository(), + CLUSTER, + requestType, + mock(ReadOnlyStoreRepository.class), + true); + } catch (MissingArgumentException e) { + throw new RuntimeException(e); + } + })); } @AfterClass diff --git a/services/venice-router/src/test/java/com/linkedin/venice/router/api/path/TestVeniceMultiGetPath.java b/services/venice-router/src/test/java/com/linkedin/venice/router/api/path/TestVeniceMultiGetPath.java index f4100ff399..464b160010 100644 --- a/services/venice-router/src/test/java/com/linkedin/venice/router/api/path/TestVeniceMultiGetPath.java +++ b/services/venice-router/src/test/java/com/linkedin/venice/router/api/path/TestVeniceMultiGetPath.java @@ -29,6 +29,7 @@ import java.util.ArrayList; import java.util.List; import java.util.Optional; +import org.apache.commons.cli.MissingArgumentException; import org.testng.Assert; import org.testng.annotations.AfterClass; import org.testng.annotations.BeforeClass; @@ -39,16 +40,23 @@ public class TestVeniceMultiGetPath { private final RetryManager disabledRetryManager = new RetryManager(new VeniceMetricsRepository(), "disabled-test-retry-manager", 0, 0, null); + public TestVeniceMultiGetPath() throws MissingArgumentException { + } + @BeforeClass public void setUp() { - RouterExceptionAndTrackingUtils.setRouterStats( - new RouterStats<>( - requestType -> new AggRouterHttpRequestStats( - new VeniceMetricsRepository(), - "test-cluster", - requestType, - mock(ReadOnlyStoreRepository.class), - true))); + RouterExceptionAndTrackingUtils.setRouterStats(new RouterStats<>(requestType -> { + try { + return new AggRouterHttpRequestStats( + new VeniceMetricsRepository(), + "test-cluster", + requestType, + mock(ReadOnlyStoreRepository.class), + true); + } catch (MissingArgumentException e) { + throw new RuntimeException(e); + } + })); } @AfterClass diff --git a/services/venice-router/src/test/java/com/linkedin/venice/router/api/path/TestVenicePath.java b/services/venice-router/src/test/java/com/linkedin/venice/router/api/path/TestVenicePath.java index eef224113d..04800c3ac3 100644 --- a/services/venice-router/src/test/java/com/linkedin/venice/router/api/path/TestVenicePath.java +++ b/services/venice-router/src/test/java/com/linkedin/venice/router/api/path/TestVenicePath.java @@ -21,6 +21,7 @@ import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.TimeUnit; import javax.annotation.Nonnull; +import org.apache.commons.cli.MissingArgumentException; import org.apache.http.client.methods.HttpUriRequest; import org.testng.Assert; import org.testng.annotations.AfterClass; @@ -88,7 +89,7 @@ public String getLocation() { private final ScheduledExecutorService retryManagerScheduler = Executors.newScheduledThreadPool(1); @BeforeMethod - public void setUp() { + public void setUp() throws MissingArgumentException { metricsRepository = new VeniceMetricsRepository(); // retry manager is disabled by default disabledRetryManager = diff --git a/services/venice-router/src/test/java/com/linkedin/venice/router/stats/AdminOperationsStatsTest.java b/services/venice-router/src/test/java/com/linkedin/venice/router/stats/AdminOperationsStatsTest.java index 35364da6b1..2672eba9fb 100644 --- a/services/venice-router/src/test/java/com/linkedin/venice/router/stats/AdminOperationsStatsTest.java +++ b/services/venice-router/src/test/java/com/linkedin/venice/router/stats/AdminOperationsStatsTest.java @@ -6,13 +6,14 @@ import com.linkedin.venice.stats.VeniceMetricsRepository; import com.linkedin.venice.tehuti.MockTehutiReporter; import com.linkedin.venice.utils.metrics.MetricsRepositoryUtils; +import org.apache.commons.cli.MissingArgumentException; import org.testng.Assert; import org.testng.annotations.Test; public class AdminOperationsStatsTest { @Test - public void testAdminOperationsStats() { + public void testAdminOperationsStats() throws MissingArgumentException { VeniceMetricsRepository metrics = MetricsRepositoryUtils.createSingleThreadedVeniceMetricsRepository(); MockTehutiReporter reporter = new MockTehutiReporter(); metrics.addReporter(reporter); From ceb4c8e7e758c1c0ac220c1c80a660450e08a833 Mon Sep 17 00:00:00 2001 From: Manoj Nagarajan Date: Tue, 19 Nov 2024 00:40:46 -0800 Subject: [PATCH 03/19] address review comments part 2 --- .../AbstractVeniceAggVersionedStats.java | 6 +- .../stats/AggHostLevelIngestionStats.java | 7 +- .../stats/AggKafkaConsumerServiceStats.java | 7 +- .../stats/AggVersionedStorageEngineStats.java | 2 +- .../davinci/stats/DIVStatsReporter.java | 2 +- .../davinci/stats/IngestionStatsReporter.java | 2 +- .../davinci/stats/VeniceVersionedStats.java | 4 +- .../stats/VeniceVersionedStatsReporter.java | 10 +- .../heartbeat/HeartbeatMonitoringService.java | 5 +- .../heartbeat/HeartbeatVersionedStats.java | 4 +- .../davinci/stats/DIVStatsReporterTest.java | 2 +- .../venice/stats/AbstractVeniceAggStats.java | 46 ++--- ...ricsRepository.java => StatsSupplier.java} | 10 +- .../StatsSupplierVeniceMetricsRepository.java | 19 --- .../linkedin/venice/stats/TehutiUtils.java | 3 +- .../venice/stats/VeniceMetricsConfig.java | 57 ++++--- .../venice/stats/VeniceMetricsRepository.java | 158 +----------------- .../VeniceOpenTelemetryMetricsRepository.java | 25 ++- .../utils/metrics/MetricsRepositoryUtils.java | 5 +- .../venice/stats/VeniceMetricsConfigTest.java | 107 ++++++------ .../stats/VeniceMetricsRepositoryTest.java | 142 ++++------------ ...iceOpenTelemetryMetricsRepositoryTest.java | 3 +- .../venice/pushmonitor/PushHealthStats.java | 2 +- .../pushmonitor/PushStatusCleanUpStats.java | 2 +- .../stats/AbstractVeniceAggStoreStats.java | 6 +- .../utils/VeniceRouterWrapper.java | 30 ++-- .../router/api/TestVeniceDispatcher.java | 20 +-- .../stats/AggPartitionHealthStats.java | 4 +- .../stats/PartitionHealthStats.java | 2 +- .../linkedin/venice/router/RouterServer.java | 27 +-- .../router/stats/AggHostHealthStats.java | 4 +- .../stats/AggRouterHttpRequestStats.java | 2 +- .../router/stats/RouterHttpRequestStats.java | 19 +-- .../router/AggRouterHttpRequestStatsTest.java | 3 +- .../router/RouteHttpRequestStatsTest.java | 3 +- .../router/api/TestVeniceDelegateMode.java | 45 +++-- .../router/api/TestVenicePathParser.java | 21 +-- .../api/path/TestVeniceMultiGetPath.java | 23 +-- .../router/api/path/TestVenicePath.java | 3 +- .../stats/AdminOperationsStatsTest.java | 3 +- .../venice/stats/AggRocksDBStats.java | 2 +- .../stats/AggServerHttpRequestStats.java | 5 +- .../stats/AggServerQuotaUsageStats.java | 2 +- 43 files changed, 292 insertions(+), 562 deletions(-) rename internal/venice-client-common/src/main/java/com/linkedin/venice/stats/{StatsSupplierMetricsRepository.java => StatsSupplier.java} (57%) delete mode 100644 internal/venice-client-common/src/main/java/com/linkedin/venice/stats/StatsSupplierVeniceMetricsRepository.java diff --git a/clients/da-vinci-client/src/main/java/com/linkedin/davinci/stats/AbstractVeniceAggVersionedStats.java b/clients/da-vinci-client/src/main/java/com/linkedin/davinci/stats/AbstractVeniceAggVersionedStats.java index 8bbd78c2bc..417da42f60 100644 --- a/clients/da-vinci-client/src/main/java/com/linkedin/davinci/stats/AbstractVeniceAggVersionedStats.java +++ b/clients/da-vinci-client/src/main/java/com/linkedin/davinci/stats/AbstractVeniceAggVersionedStats.java @@ -7,7 +7,7 @@ import com.linkedin.venice.meta.StoreDataChangedListener; import com.linkedin.venice.meta.Version; import com.linkedin.venice.meta.VersionStatus; -import com.linkedin.venice.stats.StatsSupplierMetricsRepository; +import com.linkedin.venice.stats.StatsSupplier; import com.linkedin.venice.utils.Utils; import com.linkedin.venice.utils.concurrent.VeniceConcurrentHashMap; import io.tehuti.metrics.MetricsRepository; @@ -25,7 +25,7 @@ public abstract class AbstractVeniceAggVersionedStats statsInitiator; - private final StatsSupplierMetricsRepository reporterSupplier; + private final StatsSupplier reporterSupplier; protected final ReadOnlyStoreRepository metadataRepository; private final MetricsRepository metricsRepository; @@ -37,7 +37,7 @@ public AbstractVeniceAggVersionedStats( MetricsRepository metricsRepository, ReadOnlyStoreRepository metadataRepository, Supplier statsInitiator, - StatsSupplierMetricsRepository reporterSupplier, + StatsSupplier reporterSupplier, boolean unregisterMetricForDeletedStoreEnabled) { this.metadataRepository = metadataRepository; this.metricsRepository = metricsRepository; diff --git a/clients/da-vinci-client/src/main/java/com/linkedin/davinci/stats/AggHostLevelIngestionStats.java b/clients/da-vinci-client/src/main/java/com/linkedin/davinci/stats/AggHostLevelIngestionStats.java index 3ea802cb66..a635f11809 100644 --- a/clients/da-vinci-client/src/main/java/com/linkedin/davinci/stats/AggHostLevelIngestionStats.java +++ b/clients/da-vinci-client/src/main/java/com/linkedin/davinci/stats/AggHostLevelIngestionStats.java @@ -5,7 +5,7 @@ import com.linkedin.venice.exceptions.VeniceException; import com.linkedin.venice.meta.ReadOnlyStoreRepository; import com.linkedin.venice.stats.AbstractVeniceAggStoreStats; -import com.linkedin.venice.stats.StatsSupplierMetricsRepository; +import com.linkedin.venice.stats.StatsSupplier; import com.linkedin.venice.utils.Time; import io.tehuti.metrics.MetricsRepository; import java.util.Map; @@ -29,7 +29,7 @@ public AggHostLevelIngestionStats( unregisterMetricForDeletedStoreEnabled); } - static class HostLevelStoreIngestionStatsSupplier implements StatsSupplierMetricsRepository { + static class HostLevelStoreIngestionStatsSupplier implements StatsSupplier { private final VeniceServerConfig serverConfig; private final Map ingestionTaskMap; private final Time time; @@ -44,7 +44,7 @@ static class HostLevelStoreIngestionStatsSupplier implements StatsSupplierMetric } @Override - public HostLevelIngestionStats get(MetricsRepository metricsRepository, String storeName) { + public HostLevelIngestionStats get(MetricsRepository metricsRepository, String storeName, String clusterName) { throw new VeniceException("Should not be called."); } @@ -52,6 +52,7 @@ public HostLevelIngestionStats get(MetricsRepository metricsRepository, String s public HostLevelIngestionStats get( MetricsRepository metricsRepository, String storeName, + String clusterName, HostLevelIngestionStats totalStats) { return new HostLevelIngestionStats( metricsRepository, diff --git a/clients/da-vinci-client/src/main/java/com/linkedin/davinci/stats/AggKafkaConsumerServiceStats.java b/clients/da-vinci-client/src/main/java/com/linkedin/davinci/stats/AggKafkaConsumerServiceStats.java index d4e6055154..8eaab3e6f4 100644 --- a/clients/da-vinci-client/src/main/java/com/linkedin/davinci/stats/AggKafkaConsumerServiceStats.java +++ b/clients/da-vinci-client/src/main/java/com/linkedin/davinci/stats/AggKafkaConsumerServiceStats.java @@ -3,7 +3,7 @@ import com.linkedin.venice.exceptions.VeniceException; import com.linkedin.venice.meta.ReadOnlyStoreRepository; import com.linkedin.venice.stats.AbstractVeniceAggStoreStats; -import com.linkedin.venice.stats.StatsSupplierMetricsRepository; +import com.linkedin.venice.stats.StatsSupplier; import com.linkedin.venice.utils.SystemTime; import io.tehuti.metrics.MetricsRepository; import java.util.function.LongSupplier; @@ -99,7 +99,7 @@ public void recordTotalLatestOffsetIsPresent() { totalStats.recordLatestOffsetIsPresent(); } - static class KafkaConsumerServiceStatsSupplier implements StatsSupplierMetricsRepository { + static class KafkaConsumerServiceStatsSupplier implements StatsSupplier { private final LongSupplier getMaxElapsedTimeSinceLastPollInConsumerPool; KafkaConsumerServiceStatsSupplier(LongSupplier getMaxElapsedTimeSinceLastPollInConsumerPool) { @@ -107,7 +107,7 @@ static class KafkaConsumerServiceStatsSupplier implements StatsSupplierMetricsRe } @Override - public KafkaConsumerServiceStats get(MetricsRepository metricsRepository, String storeName) { + public KafkaConsumerServiceStats get(MetricsRepository metricsRepository, String storeName, String clusterName) { throw new VeniceException("Should not be called."); } @@ -115,6 +115,7 @@ public KafkaConsumerServiceStats get(MetricsRepository metricsRepository, String public KafkaConsumerServiceStats get( MetricsRepository metricsRepository, String storeName, + String clusterName, KafkaConsumerServiceStats totalStats) { return new KafkaConsumerServiceStats( metricsRepository, diff --git a/clients/da-vinci-client/src/main/java/com/linkedin/davinci/stats/AggVersionedStorageEngineStats.java b/clients/da-vinci-client/src/main/java/com/linkedin/davinci/stats/AggVersionedStorageEngineStats.java index 24be5eab84..166ff3fef3 100644 --- a/clients/da-vinci-client/src/main/java/com/linkedin/davinci/stats/AggVersionedStorageEngineStats.java +++ b/clients/da-vinci-client/src/main/java/com/linkedin/davinci/stats/AggVersionedStorageEngineStats.java @@ -83,7 +83,7 @@ public void recordRocksDBOpenFailure() { } static class StorageEngineStatsReporter extends AbstractVeniceStatsReporter { - public StorageEngineStatsReporter(MetricsRepository metricsRepository, String storeName) { + public StorageEngineStatsReporter(MetricsRepository metricsRepository, String storeName, String clusterName) { super(metricsRepository, storeName); } diff --git a/clients/da-vinci-client/src/main/java/com/linkedin/davinci/stats/DIVStatsReporter.java b/clients/da-vinci-client/src/main/java/com/linkedin/davinci/stats/DIVStatsReporter.java index 4432917658..d48b867588 100644 --- a/clients/da-vinci-client/src/main/java/com/linkedin/davinci/stats/DIVStatsReporter.java +++ b/clients/da-vinci-client/src/main/java/com/linkedin/davinci/stats/DIVStatsReporter.java @@ -14,7 +14,7 @@ * collection/visualization system. */ public class DIVStatsReporter extends AbstractVeniceStatsReporter { - public DIVStatsReporter(MetricsRepository metricsRepository, String storeName) { + public DIVStatsReporter(MetricsRepository metricsRepository, String storeName, String clusterName) { super(metricsRepository, storeName); } diff --git a/clients/da-vinci-client/src/main/java/com/linkedin/davinci/stats/IngestionStatsReporter.java b/clients/da-vinci-client/src/main/java/com/linkedin/davinci/stats/IngestionStatsReporter.java index e88f39e5c9..7993eaa171 100644 --- a/clients/da-vinci-client/src/main/java/com/linkedin/davinci/stats/IngestionStatsReporter.java +++ b/clients/da-vinci-client/src/main/java/com/linkedin/davinci/stats/IngestionStatsReporter.java @@ -60,7 +60,7 @@ public class IngestionStatsReporter extends AbstractVeniceStatsReporter { private static final Logger LOGGER = LogManager.getLogger(IngestionStatsReporter.class); - public IngestionStatsReporter(MetricsRepository metricsRepository, String storeName) { + public IngestionStatsReporter(MetricsRepository metricsRepository, String storeName, String clusterName) { super(metricsRepository, storeName); } diff --git a/clients/da-vinci-client/src/main/java/com/linkedin/davinci/stats/VeniceVersionedStats.java b/clients/da-vinci-client/src/main/java/com/linkedin/davinci/stats/VeniceVersionedStats.java index fcb1c24aa1..ab2e88f53e 100644 --- a/clients/da-vinci-client/src/main/java/com/linkedin/davinci/stats/VeniceVersionedStats.java +++ b/clients/da-vinci-client/src/main/java/com/linkedin/davinci/stats/VeniceVersionedStats.java @@ -1,6 +1,6 @@ package com.linkedin.davinci.stats; -import com.linkedin.venice.stats.StatsSupplierMetricsRepository; +import com.linkedin.venice.stats.StatsSupplier; import io.tehuti.metrics.MetricsRepository; import it.unimi.dsi.fastutil.ints.Int2ObjectMap; import it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap; @@ -25,7 +25,7 @@ public VeniceVersionedStats( MetricsRepository metricsRepository, String storeName, Supplier statsInitiator, - StatsSupplierMetricsRepository reporterSupplier) { + StatsSupplier reporterSupplier) { this.storeName = storeName; this.versionedStats = new Int2ObjectOpenHashMap<>(); this.reporters = new VeniceVersionedStatsReporter<>(metricsRepository, storeName, reporterSupplier); diff --git a/clients/da-vinci-client/src/main/java/com/linkedin/davinci/stats/VeniceVersionedStatsReporter.java b/clients/da-vinci-client/src/main/java/com/linkedin/davinci/stats/VeniceVersionedStatsReporter.java index 4753733881..232b1c5860 100644 --- a/clients/da-vinci-client/src/main/java/com/linkedin/davinci/stats/VeniceVersionedStatsReporter.java +++ b/clients/da-vinci-client/src/main/java/com/linkedin/davinci/stats/VeniceVersionedStatsReporter.java @@ -4,7 +4,7 @@ import com.linkedin.venice.common.VeniceSystemStoreUtils; import com.linkedin.venice.stats.AbstractVeniceStats; -import com.linkedin.venice.stats.StatsSupplierMetricsRepository; +import com.linkedin.venice.stats.StatsSupplier; import io.tehuti.metrics.MetricsRepository; import io.tehuti.metrics.stats.AsyncGauge; @@ -22,7 +22,7 @@ public class VeniceVersionedStatsReporter statsSupplier) { + StatsSupplier statsSupplier) { super(metricsRepository, storeName); this.isSystemStore = VeniceSystemStoreUtils.isSystemStore(storeName); @@ -30,10 +30,10 @@ public VeniceVersionedStatsReporter( registerSensor("current_version", new AsyncGauge((ignored1, ignored2) -> currentVersion, "current_version")); registerSensor("future_version", new AsyncGauge((ignored1, ignored2) -> futureVersion, "future_version")); - this.currentStatsReporter = statsSupplier.get(metricsRepository, storeName + "_current"); + this.currentStatsReporter = statsSupplier.get(metricsRepository, storeName + "_current", (String) null); if (!isSystemStore) { - this.futureStatsReporter = statsSupplier.get(metricsRepository, storeName + "_future"); - this.totalStatsReporter = statsSupplier.get(metricsRepository, storeName + "_total"); + this.futureStatsReporter = statsSupplier.get(metricsRepository, storeName + "_future", (String) null); + this.totalStatsReporter = statsSupplier.get(metricsRepository, storeName + "_total", (String) null); } else { this.futureStatsReporter = null; this.totalStatsReporter = null; diff --git a/clients/da-vinci-client/src/main/java/com/linkedin/davinci/stats/ingestion/heartbeat/HeartbeatMonitoringService.java b/clients/da-vinci-client/src/main/java/com/linkedin/davinci/stats/ingestion/heartbeat/HeartbeatMonitoringService.java index 7fd9f177e5..5ce963a58f 100644 --- a/clients/da-vinci-client/src/main/java/com/linkedin/davinci/stats/ingestion/heartbeat/HeartbeatMonitoringService.java +++ b/clients/da-vinci-client/src/main/java/com/linkedin/davinci/stats/ingestion/heartbeat/HeartbeatMonitoringService.java @@ -68,7 +68,10 @@ public HeartbeatMonitoringService( metricsRepository, metadataRepository, () -> new HeartbeatStat(new MetricConfig(), regionNames), - (aMetricsRepository, storeName) -> new HeartbeatStatReporter(aMetricsRepository, storeName, regionNames), + (aMetricsRepository, storeName, clusterName) -> new HeartbeatStatReporter( + aMetricsRepository, + storeName, + regionNames), leaderHeartbeatTimeStamps, followerHeartbeatTimeStamps); } diff --git a/clients/da-vinci-client/src/main/java/com/linkedin/davinci/stats/ingestion/heartbeat/HeartbeatVersionedStats.java b/clients/da-vinci-client/src/main/java/com/linkedin/davinci/stats/ingestion/heartbeat/HeartbeatVersionedStats.java index a02bcd883f..7f3220c000 100644 --- a/clients/da-vinci-client/src/main/java/com/linkedin/davinci/stats/ingestion/heartbeat/HeartbeatVersionedStats.java +++ b/clients/da-vinci-client/src/main/java/com/linkedin/davinci/stats/ingestion/heartbeat/HeartbeatVersionedStats.java @@ -3,7 +3,7 @@ import com.linkedin.davinci.stats.AbstractVeniceAggVersionedStats; import com.linkedin.venice.meta.ReadOnlyStoreRepository; import com.linkedin.venice.meta.Store; -import com.linkedin.venice.stats.StatsSupplierMetricsRepository; +import com.linkedin.venice.stats.StatsSupplier; import io.tehuti.metrics.MetricsRepository; import java.util.Map; import java.util.function.Supplier; @@ -17,7 +17,7 @@ public HeartbeatVersionedStats( MetricsRepository metricsRepository, ReadOnlyStoreRepository metadataRepository, Supplier statsInitiator, - StatsSupplierMetricsRepository reporterSupplier, + StatsSupplier reporterSupplier, Map>>> leaderMonitors, Map>>> followerMonitors) { super(metricsRepository, metadataRepository, statsInitiator, reporterSupplier, true); diff --git a/clients/da-vinci-client/src/test/java/com/linkedin/davinci/stats/DIVStatsReporterTest.java b/clients/da-vinci-client/src/test/java/com/linkedin/davinci/stats/DIVStatsReporterTest.java index 1fd1632b5d..a5279bc6a9 100644 --- a/clients/da-vinci-client/src/test/java/com/linkedin/davinci/stats/DIVStatsReporterTest.java +++ b/clients/da-vinci-client/src/test/java/com/linkedin/davinci/stats/DIVStatsReporterTest.java @@ -19,7 +19,7 @@ public void testDIVReporterCanReport() { metricsRepository.addReporter(reporter); String storeName = Utils.getUniqueString("store"); - DIVStatsReporter divStatsReporter = new DIVStatsReporter(metricsRepository, storeName); + DIVStatsReporter divStatsReporter = new DIVStatsReporter(metricsRepository, storeName, null); assertEquals(reporter.query("." + storeName + "--success_msg.DIVStatsGauge").value(), (double) NULL_DIV_STATS.code); diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/AbstractVeniceAggStats.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/AbstractVeniceAggStats.java index 9255f18731..37c183ae78 100644 --- a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/AbstractVeniceAggStats.java +++ b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/AbstractVeniceAggStats.java @@ -10,39 +10,35 @@ public abstract class AbstractVeniceAggStats { protected T totalStats; protected final Map storeStats = new VeniceConcurrentHashMap<>(); - private StatsSupplierMetricsRepository statsFactoryMetricsRepository; - private StatsSupplierVeniceMetricsRepository statsFactoryVeniceMetricsRepository; + private StatsSupplier statsFactory; private final MetricsRepository metricsRepository; private String clusterName = null; - private AbstractVeniceAggStats( - MetricsRepository metricsRepository, - StatsSupplierMetricsRepository statsSupplier, - T totalStats) { + private AbstractVeniceAggStats(MetricsRepository metricsRepository, StatsSupplier statsSupplier, T totalStats) { this.metricsRepository = metricsRepository; - this.statsFactoryMetricsRepository = statsSupplier; + this.statsFactory = statsSupplier; this.totalStats = totalStats; } private AbstractVeniceAggStats( VeniceMetricsRepository metricsRepository, - StatsSupplierVeniceMetricsRepository statsSupplier, + StatsSupplier statsSupplier, String clusterName, T totalStats) { this.metricsRepository = metricsRepository; - this.statsFactoryVeniceMetricsRepository = statsSupplier; + this.statsFactory = statsSupplier; this.clusterName = clusterName; this.totalStats = totalStats; } - public AbstractVeniceAggStats(MetricsRepository metricsRepository, StatsSupplierMetricsRepository statsSupplier) { - this(metricsRepository, statsSupplier, statsSupplier.get(metricsRepository, STORE_NAME_FOR_TOTAL_STAT, null)); + public AbstractVeniceAggStats(MetricsRepository metricsRepository, StatsSupplier statsSupplier) { + this(metricsRepository, statsSupplier, statsSupplier.get(metricsRepository, STORE_NAME_FOR_TOTAL_STAT, null, null)); } public AbstractVeniceAggStats( - StatsSupplierVeniceMetricsRepository statsSupplier, VeniceMetricsRepository metricsRepository, + StatsSupplier statsSupplier, String clusterName) { this( metricsRepository, @@ -56,35 +52,25 @@ public AbstractVeniceAggStats(MetricsRepository metricsRepository, String cluste this.clusterName = clusterName; } - public void setStatsSupplier(StatsSupplierVeniceMetricsRepository statsSupplier) { - this.statsFactoryVeniceMetricsRepository = statsSupplier; - if (metricsRepository instanceof VeniceMetricsRepository) { - this.totalStats = - statsSupplier.get((VeniceMetricsRepository) metricsRepository, STORE_NAME_FOR_TOTAL_STAT, clusterName, null); - } + public void setStatsSupplier(StatsSupplier statsSupplier) { + this.statsFactory = statsSupplier; + this.totalStats = statsSupplier.get(metricsRepository, STORE_NAME_FOR_TOTAL_STAT, clusterName, null); } public AbstractVeniceAggStats( - String clusterName, MetricsRepository metricsRepository, - StatsSupplierMetricsRepository statsSupplier) { + StatsSupplier statsSupplier, + String clusterName) { this( metricsRepository, statsSupplier, - statsSupplier.get(metricsRepository, STORE_NAME_FOR_TOTAL_STAT + "." + clusterName, null)); + statsSupplier.get(metricsRepository, STORE_NAME_FOR_TOTAL_STAT + "." + clusterName, clusterName, null)); this.clusterName = clusterName; } public T getStoreStats(String storeName) { - if (metricsRepository instanceof VeniceMetricsRepository) { - return storeStats.computeIfAbsent( - storeName, - k -> statsFactoryVeniceMetricsRepository - .get((VeniceMetricsRepository) metricsRepository, storeName, clusterName, totalStats)); - } else { - return storeStats - .computeIfAbsent(storeName, k -> statsFactoryMetricsRepository.get(metricsRepository, storeName, totalStats)); - } + return storeStats + .computeIfAbsent(storeName, k -> statsFactory.get(metricsRepository, storeName, clusterName, totalStats)); } public T getNullableStoreStats(String storeName) { diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/StatsSupplierMetricsRepository.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/StatsSupplier.java similarity index 57% rename from internal/venice-client-common/src/main/java/com/linkedin/venice/stats/StatsSupplierMetricsRepository.java rename to internal/venice-client-common/src/main/java/com/linkedin/venice/stats/StatsSupplier.java index 208fc4470e..f42c97b03e 100644 --- a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/StatsSupplierMetricsRepository.java +++ b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/StatsSupplier.java @@ -3,19 +3,19 @@ import io.tehuti.metrics.MetricsRepository; -public interface StatsSupplierMetricsRepository { +public interface StatsSupplier { /** * Legacy function, for implementations that do not use total stats in their constructor. * - * @see #get(MetricsRepository, String, AbstractVeniceStats) which is the only caller. + * @see #get(MetricsRepository, String, String, AbstractVeniceStats) which is the only caller. */ - T get(MetricsRepository metricsRepository, String storeName); + T get(MetricsRepository metricsRepository, String storeName, String clusterName); /** * This is the function that gets called by {@link AbstractVeniceAggStats}, and concrete classes can * optionally implement it in order to be provided with the total stats instance. */ - default T get(MetricsRepository metricsRepository, String storeName, T totalStats) { - return get(metricsRepository, storeName); + default T get(MetricsRepository metricsRepository, String storeName, String clusterName, T totalStats) { + return get(metricsRepository, storeName, clusterName); } } diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/StatsSupplierVeniceMetricsRepository.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/StatsSupplierVeniceMetricsRepository.java deleted file mode 100644 index c604515b89..0000000000 --- a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/StatsSupplierVeniceMetricsRepository.java +++ /dev/null @@ -1,19 +0,0 @@ -package com.linkedin.venice.stats; - -/** copy of {@link StatsSupplierMetricsRepository} for {@link VeniceMetricsRepository} */ -public interface StatsSupplierVeniceMetricsRepository { - /** - * Legacy function, for implementations that do not use total stats in their constructor. - * - * @see #get(VeniceMetricsRepository, String, String, AbstractVeniceStats) which is the only caller. - */ - T get(VeniceMetricsRepository metricsRepository, String storeName, String clusterName); - - /** - * This is the function that gets called by {@link AbstractVeniceAggStats}, and concrete classes can - * optionally implement it in order to be provided with the total stats instance. - */ - default T get(VeniceMetricsRepository metricsRepository, String storeName, String clusterName, T totalStats) { - return get(metricsRepository, storeName, clusterName); - } -} diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/TehutiUtils.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/TehutiUtils.java index 86909d3ef1..513d60a63d 100644 --- a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/TehutiUtils.java +++ b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/TehutiUtils.java @@ -10,7 +10,6 @@ import io.tehuti.metrics.stats.Rate; import java.util.Arrays; import java.util.Map; -import org.apache.commons.cli.MissingArgumentException; /** @@ -134,7 +133,7 @@ public static MetricsRepository getMetricsRepository(String serviceName) { public static VeniceMetricsRepository getVeniceMetricsRepository( String serviceName, String metricPrefix, - Map configs) throws MissingArgumentException { + Map configs) { VeniceMetricsRepository metricsRepository = new VeniceMetricsRepository( new VeniceMetricsConfig.Builder().setServiceName(serviceName) .setMetricPrefix(metricPrefix) diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceMetricsConfig.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceMetricsConfig.java index e0a4fb2ae8..4c2ca5dab5 100644 --- a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceMetricsConfig.java +++ b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceMetricsConfig.java @@ -10,13 +10,28 @@ import java.util.HashMap; import java.util.Locale; import java.util.Map; -import org.apache.commons.cli.MissingArgumentException; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; public class VeniceMetricsConfig { private static final Logger LOGGER = LogManager.getLogger(VeniceMetricsConfig.class); + // create constants for all the configs + public static final String OTEL_VENICE_ENABLED = "otel.venice.enabled"; + public static final String OTEL_VENICE_METRICS_NAMING_FORMAT = "otel.venice.metrics.naming.format"; + public static final String OTEL_VENICE_EXPORT_TO_LOG = "otel.venice.export.to.log"; + public static final String OTEL_VENICE_EXPORT_TO_ENDPOINT = "otel.venice.export.to.endpoint"; + public static final String OTEL_EXPORTER_OTLP_METRICS_PROTOCOL = "otel.exporter.otlp.metrics.protocol"; + public static final String OTEL_EXPORTER_OTLP_METRICS_ENDPOINT = "otel.exporter.otlp.metrics.endpoint"; + public static final String OTEL_EXPORTER_OTLP_METRICS_HEADERS = "otel.exporter.otlp.metrics.headers"; + public static final String OTEL_EXPORTER_OTLP_METRICS_TEMPORALITY_PREFERENCE = + "otel.exporter.otlp.metrics.temporality.preference"; + public static final String OTEL_EXPORTER_OTLP_METRICS_DEFAULT_HISTOGRAM_AGGREGATION = + "otel.exporter.otlp.metrics.default.histogram.aggregation"; + public static final String OTEL_EXPORTER_OTLP_METRICS_DEFAULT_HISTOGRAM_AGGREGATION_MAX_SCALE = + "otel.exporter.otlp.metrics.default.histogram.aggregation.max.scale"; + public static final String OTEL_EXPORTER_OTLP_METRICS_DEFAULT_HISTOGRAM_AGGREGATION_MAX_BUCKETS = + "otel.exporter.otlp.metrics.default.histogram.aggregation.max.buckets"; private final String serviceName; private final String metricPrefix; /** reusing tehuti's MetricConfig */ @@ -73,7 +88,7 @@ private VeniceMetricsConfig(Builder builder) { } public static class Builder { - private String serviceName = null; + private String serviceName = "default_service"; private String metricPrefix = null; private boolean emitOtelMetrics = false; private boolean exportOtelMetricsToEndpoint = false; @@ -143,27 +158,27 @@ public Builder setOtelHistogramAggregationSelector(DefaultAggregationSelector ot */ public Builder extractAndSetOtelConfigs(Map configs) { String configValue; - if ((configValue = configs.get("otel.venice.enabled")) != null) { - setEmitOtelMetrics(configValue.toLowerCase(Locale.ROOT).equals("true")); + if ((configValue = configs.get(OTEL_VENICE_ENABLED)) != null) { + setEmitOtelMetrics(Boolean.parseBoolean(configValue)); } - if ((configValue = configs.get("otel.venice.export.to.log")) != null) { - setExportOtelMetricsToLog(configValue.toLowerCase(Locale.ROOT).equals("true")); + if ((configValue = configs.get(OTEL_VENICE_EXPORT_TO_LOG)) != null) { + setExportOtelMetricsToLog(Boolean.parseBoolean(configValue)); } - if ((configValue = configs.get("otel.venice.export.to.endpoint")) != null) { - setExportOtelMetricsToEndpoint(configValue.toLowerCase(Locale.ROOT).equals("true")); + if ((configValue = configs.get(OTEL_VENICE_EXPORT_TO_ENDPOINT)) != null) { + setExportOtelMetricsToEndpoint(Boolean.parseBoolean(configValue)); } - if ((configValue = configs.get("otel.exporter.otlp.metrics.protocol")) != null) { + if ((configValue = configs.get(OTEL_EXPORTER_OTLP_METRICS_PROTOCOL)) != null) { setOtelExportProtocol(configValue); } - if ((configValue = configs.get("otel.venice.metrics.format")) != null) { + if ((configValue = configs.get(OTEL_VENICE_METRICS_NAMING_FORMAT)) != null) { setMetricNamingFormat(VeniceOpenTelemetryMetricNamingFormat.valueOf(configValue.toUpperCase(Locale.ROOT))); } - if ((configValue = configs.get("otel.exporter.otlp.metrics.endpoint")) != null) { + if ((configValue = configs.get(OTEL_EXPORTER_OTLP_METRICS_ENDPOINT)) != null) { // validate endpoint: TODO setOtelEndpoint(configValue); } @@ -174,12 +189,12 @@ public Builder extractAndSetOtelConfigs(Map configs) { * * Currently supporting 1 header */ - if ((configValue = configs.get("otel.exporter.otlp.metrics.headers")) != null) { + if ((configValue = configs.get(OTEL_EXPORTER_OTLP_METRICS_HEADERS)) != null) { String[] headers = configValue.split("="); otelHeaders.put(headers[0], headers[1]); } - if ((configValue = configs.get("otel.exporter.otlp.metrics.temporality.preference")) != null) { + if ((configValue = configs.get(OTEL_EXPORTER_OTLP_METRICS_TEMPORALITY_PREFERENCE)) != null) { switch (configValue.toLowerCase(Locale.ROOT)) { case "cumulative": setOtelAggregationTemporalitySelector(AggregationTemporalitySelector.alwaysCumulative()); @@ -195,11 +210,11 @@ public Builder extractAndSetOtelConfigs(Map configs) { } } - if ((configValue = configs.get("otel.exporter.otlp.metrics.default.histogram.aggregation")) != null) { + if ((configValue = configs.get(OTEL_EXPORTER_OTLP_METRICS_DEFAULT_HISTOGRAM_AGGREGATION)) != null) { switch (configValue.toLowerCase(Locale.ROOT)) { case "base2_exponential_bucket_histogram": - String maxScaleValue = configs.get("otel.exporter.otlp.metrics.default.histogram.aggregation.max.scale"); - String maxBucketValue = configs.get("otel.exporter.otlp.metrics.default.histogram.aggregation.max.buckets"); + String maxScaleValue = configs.get(OTEL_EXPORTER_OTLP_METRICS_DEFAULT_HISTOGRAM_AGGREGATION_MAX_SCALE); + String maxBucketValue = configs.get(OTEL_EXPORTER_OTLP_METRICS_DEFAULT_HISTOGRAM_AGGREGATION_MAX_BUCKETS); if (maxScaleValue != null && maxBucketValue != null) { int maxScale = Integer.parseInt(maxScaleValue); int maxBuckets = Integer.parseInt(maxBucketValue); @@ -238,15 +253,11 @@ public Builder setTehutiMetricConfig(MetricConfig tehutiMetricConfig) { } // Validate required fields before building - private void checkAndSetDefaults() throws MissingArgumentException { + private void checkAndSetDefaults() { if (tehutiMetricConfig == null) { setTehutiMetricConfig(new MetricConfig()); } - if (serviceName == null) { - throw new MissingArgumentException("serviceName is required to configure OpenTelemetry"); - } - if (metricPrefix == null) { LOGGER.warn("metricPrefix is not set. Defaulting to empty string"); setMetricPrefix(""); @@ -255,7 +266,7 @@ private void checkAndSetDefaults() throws MissingArgumentException { if (emitOtelMetrics) { if (exportOtelMetricsToEndpoint) { if (otelEndpoint == null) { - throw new MissingArgumentException("endpoint is required to configure OpenTelemetry metrics export"); + throw new IllegalArgumentException("endpoint is required to configure OpenTelemetry metrics export"); } } else { @@ -266,7 +277,7 @@ private void checkAndSetDefaults() throws MissingArgumentException { } } - public VeniceMetricsConfig build() throws MissingArgumentException { + public VeniceMetricsConfig build() { checkAndSetDefaults(); return new VeniceMetricsConfig(this); } diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceMetricsRepository.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceMetricsRepository.java index 6d2d899441..3d94be4e49 100644 --- a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceMetricsRepository.java +++ b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceMetricsRepository.java @@ -1,34 +1,22 @@ package com.linkedin.venice.stats; -import io.tehuti.Metric; -import io.tehuti.metrics.Measurable; -import io.tehuti.metrics.MetricConfig; -import io.tehuti.metrics.MetricsReporter; import io.tehuti.metrics.MetricsRepository; -import io.tehuti.metrics.Sensor; -import io.tehuti.metrics.stats.AsyncGauge; import java.io.Closeable; -import java.util.Map; -import java.util.Objects; -import org.apache.commons.cli.MissingArgumentException; /** extends MetricsRepository to keep the changes to a minimum. Next step would be to create a MetricsRepository inside rather than extending it */ public class VeniceMetricsRepository extends MetricsRepository implements Closeable { - private MetricsRepository delegate = null; private VeniceMetricsConfig veniceMetricsConfig; VeniceOpenTelemetryMetricsRepository openTelemetryMetricsRepository; - public VeniceMetricsRepository() throws MissingArgumentException { + public VeniceMetricsRepository() { super(); this.veniceMetricsConfig = new VeniceMetricsConfig.Builder().build(); this.openTelemetryMetricsRepository = new VeniceOpenTelemetryMetricsRepository(veniceMetricsConfig); } public VeniceMetricsRepository(VeniceMetricsConfig veniceMetricsConfig) { - super(veniceMetricsConfig.getTehutiMetricConfig()); - this.veniceMetricsConfig = veniceMetricsConfig; - this.openTelemetryMetricsRepository = new VeniceOpenTelemetryMetricsRepository(veniceMetricsConfig); + this(veniceMetricsConfig, new VeniceOpenTelemetryMetricsRepository(veniceMetricsConfig)); } public VeniceMetricsRepository( @@ -39,22 +27,6 @@ public VeniceMetricsRepository( this.openTelemetryMetricsRepository = openTelemetryMetricsRepository; } - /** if MetricsRepository is passed in, then use it as the delegate, can be removed after the migration */ - public VeniceMetricsRepository(MetricsRepository metricsRepository, VeniceMetricsConfig veniceMetricsConfig) { - this.delegate = Objects.requireNonNull(metricsRepository); - this.veniceMetricsConfig = veniceMetricsConfig; - this.openTelemetryMetricsRepository = new VeniceOpenTelemetryMetricsRepository(veniceMetricsConfig); - } - - public VeniceMetricsRepository( - MetricsRepository metricsRepository, - VeniceMetricsConfig veniceMetricsConfig, - VeniceOpenTelemetryMetricsRepository openTelemetryMetricsRepository) { - this.delegate = Objects.requireNonNull(metricsRepository); - this.veniceMetricsConfig = veniceMetricsConfig; - this.openTelemetryMetricsRepository = openTelemetryMetricsRepository; - } - public VeniceOpenTelemetryMetricsRepository getOpenTelemetryMetricsRepository() { return this.openTelemetryMetricsRepository; } @@ -65,129 +37,9 @@ public VeniceMetricsConfig getVeniceMetricsConfig() { @Override public void close() { - if (delegate != null) { - delegate.close(); - } else { - super.close(); - } - openTelemetryMetricsRepository.close(); - } - - // all other overrides from MetricsRepository to use delegate - @Override - public Sensor getSensor(String name) { - if (delegate != null) { - return delegate.getSensor(name); - } else { - return super.getSensor(name); - } - } - - @Override - public Sensor sensor(String name) { - if (delegate != null) { - return delegate.sensor(name); - } else { - return super.sensor(name); - } - } - - @Override - public Sensor sensor(String name, Sensor... parents) { - if (delegate != null) { - return delegate.sensor(name, parents); - } else { - return super.sensor(name, parents); - } - } - - @Override - public synchronized Sensor sensor(String name, MetricConfig config, Sensor... parents) { - if (delegate != null) { - return delegate.sensor(name, config, parents); - } else { - return super.sensor(name, config, parents); - } - } - - @Override - public synchronized void removeSensor(String name) { - if (delegate != null) { - delegate.removeSensor(name); - } else { - super.removeSensor(name); - } - } - - @Override - public Metric addMetric(String name, Measurable measurable) { - if (delegate != null) { - return delegate.addMetric(name, measurable); - } else { - return super.addMetric(name, measurable); - } - } - - @Override - public Metric addMetric(String name, String description, Measurable measurable) { - if (delegate != null) { - return delegate.addMetric(name, description, measurable); - } else { - return super.addMetric(name, description, measurable); - } - } - - @Override - public Metric addMetric(String name, MetricConfig config, Measurable measurable) { - if (delegate != null) { - return delegate.addMetric(name, config, measurable); - } else { - return super.addMetric(name, config, measurable); - } - } - - @Override - public synchronized Metric addMetric(String name, String description, MetricConfig config, Measurable measurable) { - if (delegate != null) { - return delegate.addMetric(name, description, config, measurable); - } else { - return super.addMetric(name, description, config, measurable); - } - } - - @Override - public synchronized void addReporter(MetricsReporter reporter) { - if (delegate != null) { - delegate.addReporter(reporter); - } else { - super.addReporter(reporter); - } - } - - @Override - public Map metrics() { - if (delegate != null) { - return delegate.metrics(); - } else { - return super.metrics(); - } - } - - @Override - public Metric getMetric(String name) { - if (delegate != null) { - return delegate.getMetric(name); - } else { - return super.getMetric(name); - } - } - - @Override - public AsyncGauge.AsyncGaugeExecutor getAsyncGaugeExecutor() { - if (delegate != null) { - return delegate.getAsyncGaugeExecutor(); - } else { - return super.getAsyncGaugeExecutor(); + super.close(); + if (openTelemetryMetricsRepository != null) { + openTelemetryMetricsRepository.close(); } } } diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceOpenTelemetryMetricsRepository.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceOpenTelemetryMetricsRepository.java index b892ee0203..478c3738bf 100644 --- a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceOpenTelemetryMetricsRepository.java +++ b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceOpenTelemetryMetricsRepository.java @@ -2,7 +2,6 @@ import static com.linkedin.venice.stats.VeniceOpenTelemetryMetricNamingFormat.transformMetricName; import static com.linkedin.venice.stats.VeniceOpenTelemetryMetricNamingFormat.validateMetricName; -import static io.opentelemetry.sdk.metrics.data.AggregationTemporality.DELTA; import com.linkedin.venice.exceptions.VeniceException; import com.linkedin.venice.utils.concurrent.VeniceConcurrentHashMap; @@ -16,6 +15,7 @@ import io.opentelemetry.exporter.otlp.http.metrics.OtlpHttpMetricExporterBuilder; import io.opentelemetry.sdk.OpenTelemetrySdk; import io.opentelemetry.sdk.common.CompletableResultCode; +import io.opentelemetry.sdk.metrics.Aggregation; import io.opentelemetry.sdk.metrics.InstrumentType; import io.opentelemetry.sdk.metrics.SdkMeterProvider; import io.opentelemetry.sdk.metrics.SdkMeterProviderBuilder; @@ -86,7 +86,7 @@ public VeniceOpenTelemetryMetricsRepository(VeniceMetricsConfig metricsConfig) { } if (metricsConfig.exportOtelMetricsToLog()) { // internal to test: Disabled by default - builder.registerMetricReader(PeriodicMetricReader.builder(new LogBasedMetricExporter()).build()); + builder.registerMetricReader(PeriodicMetricReader.builder(new LogBasedMetricExporter(metricsConfig)).build()); } builder.setResource(Resource.empty()); @@ -156,14 +156,27 @@ public LongCounter getCounter(String name, String unit, String description) { public void close() { LOGGER.info("OpenTelemetry close"); - sdkMeterProvider.shutdown(); - sdkMeterProvider = null; + if (sdkMeterProvider != null) { + sdkMeterProvider.shutdown(); + sdkMeterProvider = null; + } } - static class LogBasedMetricExporter implements MetricExporter { + class LogBasedMetricExporter implements MetricExporter { + VeniceMetricsConfig metricsConfig; + + LogBasedMetricExporter(VeniceMetricsConfig metricsConfig) { + this.metricsConfig = metricsConfig; + } + @Override public AggregationTemporality getAggregationTemporality(InstrumentType instrumentType) { - return DELTA; + return metricsConfig.getOtelAggregationTemporalitySelector().getAggregationTemporality(instrumentType); + } + + @Override + public Aggregation getDefaultAggregation(InstrumentType instrumentType) { + return metricsConfig.getOtelHistogramAggregationSelector().getDefaultAggregation(instrumentType); } @Override diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/utils/metrics/MetricsRepositoryUtils.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/utils/metrics/MetricsRepositoryUtils.java index 70a51b9f1a..afca2893ac 100644 --- a/internal/venice-client-common/src/main/java/com/linkedin/venice/utils/metrics/MetricsRepositoryUtils.java +++ b/internal/venice-client-common/src/main/java/com/linkedin/venice/utils/metrics/MetricsRepositoryUtils.java @@ -6,7 +6,6 @@ import io.tehuti.metrics.MetricsRepository; import io.tehuti.metrics.stats.AsyncGauge; import java.util.concurrent.TimeUnit; -import org.apache.commons.cli.MissingArgumentException; /** @@ -23,7 +22,7 @@ public static MetricsRepository createSingleThreadedMetricsRepository() { return createSingleThreadedMetricsRepository(TimeUnit.MINUTES.toMillis(1), 100); } - public static VeniceMetricsRepository createSingleThreadedVeniceMetricsRepository() throws MissingArgumentException { + public static VeniceMetricsRepository createSingleThreadedVeniceMetricsRepository() { return createSingleThreadedVeniceMetricsRepository(TimeUnit.MINUTES.toMillis(1), 100); } @@ -41,7 +40,7 @@ public static MetricsRepository createSingleThreadedMetricsRepository( public static VeniceMetricsRepository createSingleThreadedVeniceMetricsRepository( long maxMetricsMeasurementTimeoutMs, - long initialMetricsMeasurementTimeoutMs) throws MissingArgumentException { + long initialMetricsMeasurementTimeoutMs) { MetricConfig tehutiMetricsConfig = new MetricConfig( new AsyncGauge.AsyncGaugeExecutor.Builder().setMetricMeasurementThreadCount(1) .setSlowMetricMeasurementThreadCount(1) diff --git a/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/VeniceMetricsConfigTest.java b/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/VeniceMetricsConfigTest.java index bd1232db85..021bf78301 100644 --- a/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/VeniceMetricsConfigTest.java +++ b/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/VeniceMetricsConfigTest.java @@ -1,5 +1,15 @@ package com.linkedin.venice.stats; +import static com.linkedin.venice.stats.VeniceMetricsConfig.OTEL_EXPORTER_OTLP_METRICS_DEFAULT_HISTOGRAM_AGGREGATION; +import static com.linkedin.venice.stats.VeniceMetricsConfig.OTEL_EXPORTER_OTLP_METRICS_DEFAULT_HISTOGRAM_AGGREGATION_MAX_BUCKETS; +import static com.linkedin.venice.stats.VeniceMetricsConfig.OTEL_EXPORTER_OTLP_METRICS_DEFAULT_HISTOGRAM_AGGREGATION_MAX_SCALE; +import static com.linkedin.venice.stats.VeniceMetricsConfig.OTEL_EXPORTER_OTLP_METRICS_ENDPOINT; +import static com.linkedin.venice.stats.VeniceMetricsConfig.OTEL_EXPORTER_OTLP_METRICS_PROTOCOL; +import static com.linkedin.venice.stats.VeniceMetricsConfig.OTEL_EXPORTER_OTLP_METRICS_TEMPORALITY_PREFERENCE; +import static com.linkedin.venice.stats.VeniceMetricsConfig.OTEL_VENICE_ENABLED; +import static com.linkedin.venice.stats.VeniceMetricsConfig.OTEL_VENICE_EXPORT_TO_ENDPOINT; +import static com.linkedin.venice.stats.VeniceMetricsConfig.OTEL_VENICE_EXPORT_TO_LOG; +import static com.linkedin.venice.stats.VeniceMetricsConfig.OTEL_VENICE_METRICS_NAMING_FORMAT; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertFalse; import static org.testng.Assert.assertNotNull; @@ -12,18 +22,17 @@ import io.tehuti.metrics.MetricConfig; import java.util.HashMap; import java.util.Map; -import org.apache.commons.cli.MissingArgumentException; import org.testng.annotations.Test; public class VeniceMetricsConfigTest { - @Test(expectedExceptions = MissingArgumentException.class) - public void testDefaultValuesThrowsException() throws MissingArgumentException { + @Test + public void testDefaultValues() { new Builder().build(); } @Test - public void testDefaultValuesWithBasicConfig() throws MissingArgumentException { + public void testDefaultValuesWithBasicConfig() { VeniceMetricsConfig config = new Builder().setServiceName("noop_service").setMetricPrefix("service").build(); assertEquals(config.getServiceName(), "noop_service"); assertEquals(config.getMetricPrefix(), "service"); @@ -40,10 +49,10 @@ public void testDefaultValuesWithBasicConfig() throws MissingArgumentException { } @Test - public void testCustomValues() throws MissingArgumentException { + public void testCustomValues() { Map otelConfigs = new HashMap<>(); - otelConfigs.put("otel.venice.enabled", "true"); - otelConfigs.put("otel.venice.export.to.log", "true"); + otelConfigs.put(OTEL_VENICE_ENABLED, "true"); + otelConfigs.put(OTEL_VENICE_EXPORT_TO_LOG, "true"); MetricConfig metricConfig = new MetricConfig(); @@ -60,11 +69,11 @@ public void testCustomValues() throws MissingArgumentException { assertEquals(config.getTehutiMetricConfig(), metricConfig); } - @Test(expectedExceptions = MissingArgumentException.class) - public void testOtelMissingConfigs() throws MissingArgumentException { + @Test(expectedExceptions = IllegalArgumentException.class) + public void testOtelMissingConfigs() { Map invalidOtelConfigs = new HashMap<>(); - invalidOtelConfigs.put("otel.venice.enabled", "true"); - invalidOtelConfigs.put("otel.venice.export.to.endpoint", "true"); + invalidOtelConfigs.put(OTEL_VENICE_ENABLED, "true"); + invalidOtelConfigs.put(OTEL_VENICE_EXPORT_TO_ENDPOINT, "true"); new Builder().setServiceName("TestService") .setMetricPrefix("TestPrefix") @@ -73,18 +82,18 @@ public void testOtelMissingConfigs() throws MissingArgumentException { } @Test(expectedExceptions = IllegalArgumentException.class) - public void testOtelConfigWithInvalidMetricFormat() throws MissingArgumentException { + public void testOtelConfigWithInvalidMetricFormat() { Map otelConfigs = new HashMap<>(); - otelConfigs.put("otel.venice.metrics.format", "INVALID_FORMAT"); + otelConfigs.put(OTEL_VENICE_METRICS_NAMING_FORMAT, "INVALID_FORMAT"); new Builder().extractAndSetOtelConfigs(otelConfigs).build(); } @Test - public void testOtelConfigWithValidMetricFormat() throws MissingArgumentException { + public void testOtelConfigWithValidMetricFormat() { Map otelConfigs = new HashMap<>(); - otelConfigs.put("otel.venice.enabled", "true"); - otelConfigs.put("otel.venice.metrics.format", "CAMEL_CASE"); + otelConfigs.put(OTEL_VENICE_ENABLED, "true"); + otelConfigs.put(OTEL_VENICE_METRICS_NAMING_FORMAT, "CAMEL_CASE"); VeniceMetricsConfig config = new Builder().setServiceName("TestService") .setMetricPrefix("TestPrefix") @@ -95,12 +104,12 @@ public void testOtelConfigWithValidMetricFormat() throws MissingArgumentExceptio } @Test - public void testEnableHttpGrpcEndpointConfigWithRequiredFields() throws MissingArgumentException { + public void testEnableHttpGrpcEndpointConfigWithRequiredFields() { Map otelConfigs = new HashMap<>(); - otelConfigs.put("otel.venice.enabled", "true"); - otelConfigs.put("otel.venice.export.to.endpoint", "true"); - otelConfigs.put("otel.exporter.otlp.metrics.protocol", OtlpConfigUtil.PROTOCOL_HTTP_PROTOBUF); - otelConfigs.put("otel.exporter.otlp.metrics.endpoint", "http://localhost"); + otelConfigs.put(OTEL_VENICE_ENABLED, "true"); + otelConfigs.put(OTEL_VENICE_EXPORT_TO_ENDPOINT, "true"); + otelConfigs.put(OTEL_EXPORTER_OTLP_METRICS_PROTOCOL, OtlpConfigUtil.PROTOCOL_HTTP_PROTOBUF); + otelConfigs.put(OTEL_EXPORTER_OTLP_METRICS_ENDPOINT, "http://localhost"); VeniceMetricsConfig config = new Builder().setServiceName("TestService") .setMetricPrefix("TestPrefix") @@ -113,13 +122,13 @@ public void testEnableHttpGrpcEndpointConfigWithRequiredFields() throws MissingA } @Test - public void testSetAggregationTemporalitySelector() throws MissingArgumentException { + public void testSetAggregationTemporalitySelector() { Map otelConfigs = new HashMap<>(); - otelConfigs.put("otel.venice.enabled", "true"); - otelConfigs.put("otel.venice.export.to.endpoint", "true"); - otelConfigs.put("otel.exporter.otlp.metrics.protocol", OtlpConfigUtil.PROTOCOL_HTTP_PROTOBUF); - otelConfigs.put("otel.exporter.otlp.metrics.endpoint", "http://localhost"); - otelConfigs.put("otel.exporter.otlp.metrics.temporality.preference", "delta"); + otelConfigs.put(OTEL_VENICE_ENABLED, "true"); + otelConfigs.put(OTEL_VENICE_EXPORT_TO_ENDPOINT, "true"); + otelConfigs.put(OTEL_EXPORTER_OTLP_METRICS_PROTOCOL, OtlpConfigUtil.PROTOCOL_HTTP_PROTOBUF); + otelConfigs.put(OTEL_EXPORTER_OTLP_METRICS_ENDPOINT, "http://localhost"); + otelConfigs.put(OTEL_EXPORTER_OTLP_METRICS_TEMPORALITY_PREFERENCE, "delta"); VeniceMetricsConfig config = new Builder().setServiceName("TestService") .setMetricPrefix("TestPrefix") @@ -129,13 +138,13 @@ public void testSetAggregationTemporalitySelector() throws MissingArgumentExcept } @Test(expectedExceptions = IllegalArgumentException.class) - public void testSetAggregationTemporalitySelectorInvalidConfig() throws MissingArgumentException { + public void testSetAggregationTemporalitySelectorInvalidConfig() { Map otelConfigs = new HashMap<>(); - otelConfigs.put("otel.venice.enabled", "true"); - otelConfigs.put("otel.venice.export.to.endpoint", "true"); - otelConfigs.put("otel.exporter.otlp.metrics.protocol", OtlpConfigUtil.PROTOCOL_HTTP_PROTOBUF); - otelConfigs.put("otel.exporter.otlp.metrics.endpoint", "http://localhost"); - otelConfigs.put("otel.exporter.otlp.metrics.temporality.preference", "invalid"); + otelConfigs.put(OTEL_VENICE_ENABLED, "true"); + otelConfigs.put(OTEL_VENICE_EXPORT_TO_ENDPOINT, "true"); + otelConfigs.put(OTEL_EXPORTER_OTLP_METRICS_PROTOCOL, OtlpConfigUtil.PROTOCOL_HTTP_PROTOBUF); + otelConfigs.put(OTEL_EXPORTER_OTLP_METRICS_ENDPOINT, "http://localhost"); + otelConfigs.put(OTEL_EXPORTER_OTLP_METRICS_TEMPORALITY_PREFERENCE, "invalid"); VeniceMetricsConfig config = new Builder().setServiceName("TestService") .setMetricPrefix("TestPrefix") @@ -145,15 +154,15 @@ public void testSetAggregationTemporalitySelectorInvalidConfig() throws MissingA } @Test - public void testSetHistogramAggregationSelector() throws MissingArgumentException { + public void testSetHistogramAggregationSelector() { Map otelConfigs = new HashMap<>(); - otelConfigs.put("otel.venice.enabled", "true"); - otelConfigs.put("otel.venice.export.to.endpoint", "true"); - otelConfigs.put("otel.exporter.otlp.metrics.protocol", OtlpConfigUtil.PROTOCOL_HTTP_PROTOBUF); - otelConfigs.put("otel.exporter.otlp.metrics.endpoint", "http://localhost"); - otelConfigs.put("otel.exporter.otlp.metrics.default.histogram.aggregation", "base2_exponential_bucket_histogram"); - otelConfigs.put("otel.exporter.otlp.metrics.default.histogram.aggregation.max.scale", "10"); - otelConfigs.put("otel.exporter.otlp.metrics.default.histogram.aggregation.max.buckets", "50"); + otelConfigs.put(OTEL_VENICE_ENABLED, "true"); + otelConfigs.put(OTEL_VENICE_EXPORT_TO_ENDPOINT, "true"); + otelConfigs.put(OTEL_EXPORTER_OTLP_METRICS_PROTOCOL, OtlpConfigUtil.PROTOCOL_HTTP_PROTOBUF); + otelConfigs.put(OTEL_EXPORTER_OTLP_METRICS_ENDPOINT, "http://localhost"); + otelConfigs.put(OTEL_EXPORTER_OTLP_METRICS_DEFAULT_HISTOGRAM_AGGREGATION, "base2_exponential_bucket_histogram"); + otelConfigs.put(OTEL_EXPORTER_OTLP_METRICS_DEFAULT_HISTOGRAM_AGGREGATION_MAX_SCALE, "10"); + otelConfigs.put(OTEL_EXPORTER_OTLP_METRICS_DEFAULT_HISTOGRAM_AGGREGATION_MAX_BUCKETS, "50"); VeniceMetricsConfig config = new Builder().setServiceName("TestService") .setMetricPrefix("TestPrefix") @@ -165,15 +174,15 @@ public void testSetHistogramAggregationSelector() throws MissingArgumentExceptio } @Test(expectedExceptions = IllegalArgumentException.class) - public void testSetHistogramAggregationSelectorInvalidConfig() throws MissingArgumentException { + public void testSetHistogramAggregationSelectorInvalidConfig() { Map otelConfigs = new HashMap<>(); - otelConfigs.put("otel.venice.enabled", "true"); - otelConfigs.put("otel.venice.export.to.endpoint", "true"); - otelConfigs.put("otel.exporter.otlp.metrics.protocol", OtlpConfigUtil.PROTOCOL_HTTP_PROTOBUF); - otelConfigs.put("otel.exporter.otlp.metrics.endpoint", "http://localhost"); - otelConfigs.put("otel.exporter.otlp.metrics.default.histogram.aggregation", "invalid"); - otelConfigs.put("otel.exporter.otlp.metrics.default.histogram.aggregation.max.scale", "10"); - otelConfigs.put("otel.exporter.otlp.metrics.default.histogram.aggregation.max.buckets", "50"); + otelConfigs.put(OTEL_VENICE_ENABLED, "true"); + otelConfigs.put(OTEL_VENICE_EXPORT_TO_ENDPOINT, "true"); + otelConfigs.put(OTEL_EXPORTER_OTLP_METRICS_PROTOCOL, OtlpConfigUtil.PROTOCOL_HTTP_PROTOBUF); + otelConfigs.put(OTEL_EXPORTER_OTLP_METRICS_ENDPOINT, "http://localhost"); + otelConfigs.put(OTEL_EXPORTER_OTLP_METRICS_DEFAULT_HISTOGRAM_AGGREGATION, "invalid"); + otelConfigs.put(OTEL_EXPORTER_OTLP_METRICS_DEFAULT_HISTOGRAM_AGGREGATION_MAX_SCALE, "10"); + otelConfigs.put(OTEL_EXPORTER_OTLP_METRICS_DEFAULT_HISTOGRAM_AGGREGATION_MAX_BUCKETS, "50"); new Builder().setServiceName("TestService") .setMetricPrefix("TestPrefix") diff --git a/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/VeniceMetricsRepositoryTest.java b/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/VeniceMetricsRepositoryTest.java index 940ee20b0e..70f3e50702 100644 --- a/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/VeniceMetricsRepositoryTest.java +++ b/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/VeniceMetricsRepositoryTest.java @@ -1,133 +1,51 @@ package com.linkedin.venice.stats; import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertNotNull; -import io.tehuti.Metric; -import io.tehuti.metrics.Measurable; -import io.tehuti.metrics.MetricsReporter; -import io.tehuti.metrics.MetricsRepository; -import io.tehuti.metrics.Sensor; -import io.tehuti.metrics.stats.AsyncGauge; -import java.util.Map; -import org.apache.commons.cli.MissingArgumentException; +import io.tehuti.metrics.MetricConfig; import org.mockito.Mockito; -import org.testng.annotations.AfterMethod; -import org.testng.annotations.BeforeMethod; import org.testng.annotations.Test; public class VeniceMetricsRepositoryTest { - private VeniceMetricsRepository metricsRepository; - private VeniceOpenTelemetryMetricsRepository mockOpenTelemetryMetricsRepository; - private MetricsRepository mockDelegate; - - @BeforeMethod - public void setUp() throws MissingArgumentException { - VeniceMetricsConfig config = - new VeniceMetricsConfig.Builder().setServiceName("test").setMetricPrefix("test").build(); - mockOpenTelemetryMetricsRepository = Mockito.mock(VeniceOpenTelemetryMetricsRepository.class); - mockDelegate = Mockito.mock(MetricsRepository.class); - metricsRepository = new VeniceMetricsRepository(mockDelegate, config, mockOpenTelemetryMetricsRepository); - } - - @AfterMethod - public void tearDown() { - metricsRepository.close(); - } - - @Test - public void testConstructorWithDelegateAndConfig() throws MissingArgumentException { - VeniceMetricsConfig config = - new VeniceMetricsConfig.Builder().setServiceName("test").setMetricPrefix("test").build(); - VeniceMetricsRepository repo = - new VeniceMetricsRepository(mockDelegate, config, mockOpenTelemetryMetricsRepository); - - assertEquals(repo.getVeniceMetricsConfig(), config); - assertEquals(repo.getOpenTelemetryMetricsRepository(), mockOpenTelemetryMetricsRepository); - } - @Test - public void testCloseWithDelegate() { - metricsRepository.close(); - Mockito.verify(mockDelegate, Mockito.times(1)).close(); - Mockito.verify(mockOpenTelemetryMetricsRepository, Mockito.times(1)).close(); + public void testDefaultConstructor() throws Exception { + VeniceMetricsRepository repository = new VeniceMetricsRepository(); + assertNotNull(repository.getVeniceMetricsConfig(), "VeniceMetricsConfig should not be null."); + assertNotNull(repository.getOpenTelemetryMetricsRepository(), "OpenTelemetryMetricsRepository should not be null."); + repository.close(); } @Test - public void testAddMetricDelegation() { - Measurable measurable = Mockito.mock(Measurable.class); - Metric metric = Mockito.mock(Metric.class); - - Mockito.when(mockDelegate.addMetric("testMetric", measurable)).thenReturn(metric); - - Metric returnedMetric = metricsRepository.addMetric("testMetric", measurable); - assertEquals(returnedMetric, metric); - - Mockito.verify(mockDelegate, Mockito.times(1)).addMetric("testMetric", measurable); - } - - @Test - public void testGetSensorDelegation() { - Sensor sensor = Mockito.mock(Sensor.class); - Mockito.when(mockDelegate.getSensor("testSensor")).thenReturn(sensor); - - Sensor returnedSensor = metricsRepository.getSensor("testSensor"); - assertEquals(returnedSensor, sensor); - - Mockito.verify(mockDelegate, Mockito.times(1)).getSensor("testSensor"); - } - - @Test - public void testMetricsRetrieval() { - Map mockMetrics = Mockito.mock(Map.class); - Mockito.doReturn(mockMetrics).when(mockDelegate).metrics(); - - Map retrievedMetrics = metricsRepository.metrics(); - assertEquals(retrievedMetrics, mockMetrics); - - Mockito.verify(mockDelegate, Mockito.times(1)).metrics(); - } - - @Test - public void testGetMetricDelegation() { - Metric metric = Mockito.mock(Metric.class); - Mockito.when(mockDelegate.getMetric("testMetric")).thenReturn(metric); - - Metric retrievedMetric = metricsRepository.getMetric("testMetric"); - assertEquals(retrievedMetric, metric); - - Mockito.verify(mockDelegate, Mockito.times(1)).getMetric("testMetric"); - } - - @Test - public void testAddReporterDelegation() { - MetricsReporter mockReporter = Mockito.mock(MetricsReporter.class); - - metricsRepository.addReporter(mockReporter); - Mockito.verify(mockDelegate, Mockito.times(1)).addReporter(mockReporter); - } - - @Test - public void testAsyncGaugeExecutorDelegation() { - AsyncGauge.AsyncGaugeExecutor asyncGaugeExecutor = Mockito.mock(AsyncGauge.AsyncGaugeExecutor.class); - Mockito.when(mockDelegate.getAsyncGaugeExecutor()).thenReturn(asyncGaugeExecutor); - - AsyncGauge.AsyncGaugeExecutor executor = metricsRepository.getAsyncGaugeExecutor(); - assertEquals(executor, asyncGaugeExecutor); + public void testConstructorWithAllParameters() { + VeniceMetricsConfig metricsConfig = new VeniceMetricsConfig.Builder().build(); + VeniceOpenTelemetryMetricsRepository openTelemetryMetricsRepository = + new VeniceOpenTelemetryMetricsRepository(metricsConfig); + VeniceMetricsRepository repository = new VeniceMetricsRepository(metricsConfig, openTelemetryMetricsRepository); - Mockito.verify(mockDelegate, Mockito.times(1)).getAsyncGaugeExecutor(); + assertEquals( + repository.getVeniceMetricsConfig(), + metricsConfig, + "VeniceMetricsConfig should match the provided config."); + assertEquals( + repository.getOpenTelemetryMetricsRepository(), + openTelemetryMetricsRepository, + "OpenTelemetryMetricsRepository should match the provided instance."); + repository.close(); } @Test - public void testSensorCreationAndDeletionWithDelegate() { - Sensor mockSensor = Mockito.mock(Sensor.class); - Mockito.when(mockDelegate.sensor("testSensor")).thenReturn(mockSensor); + public void testCloseMethod() { + VeniceMetricsConfig mockConfig = Mockito.mock(VeniceMetricsConfig.class); + VeniceOpenTelemetryMetricsRepository mockOpenTelemetryRepository = + Mockito.mock(VeniceOpenTelemetryMetricsRepository.class); + Mockito.when(mockConfig.getTehutiMetricConfig()).thenReturn(new MetricConfig()); - Sensor sensor = metricsRepository.sensor("testSensor"); - assertEquals(sensor, mockSensor); + VeniceMetricsRepository repository = new VeniceMetricsRepository(mockConfig, mockOpenTelemetryRepository); + repository.close(); - metricsRepository.removeSensor("testSensor"); - Mockito.verify(mockDelegate, Mockito.times(1)).sensor("testSensor"); - Mockito.verify(mockDelegate, Mockito.times(1)).removeSensor("testSensor"); + // Verify that close methods are called + Mockito.verify(mockOpenTelemetryRepository).close(); } } diff --git a/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/VeniceOpenTelemetryMetricsRepositoryTest.java b/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/VeniceOpenTelemetryMetricsRepositoryTest.java index b6cb77b13b..f9ec91df5e 100644 --- a/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/VeniceOpenTelemetryMetricsRepositoryTest.java +++ b/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/VeniceOpenTelemetryMetricsRepositoryTest.java @@ -1,5 +1,6 @@ package com.linkedin.venice.stats; +import static com.linkedin.venice.stats.VeniceMetricsConfig.OTEL_EXPORTER_OTLP_METRICS_ENDPOINT; import static com.linkedin.venice.stats.VeniceOpenTelemetryMetricNamingFormat.transformMetricName; import static com.linkedin.venice.stats.VeniceOpenTelemetryMetricNamingFormat.validateMetricName; import static org.testng.Assert.assertEquals; @@ -64,7 +65,7 @@ public void testConstructorWithEmitDisabled() { @Test public void testGetOtlpHttpMetricExporterWithValidConfig() { HashMap otelConfigs = new HashMap<>(); - otelConfigs.put("otel.exporter.otlp.endpoint", "http://localhost:4318"); + otelConfigs.put(OTEL_EXPORTER_OTLP_METRICS_ENDPOINT, "http://localhost:4318"); MetricExporter exporter = metricsRepository.getOtlpHttpMetricExporter(mockMetricsConfig); diff --git a/internal/venice-common/src/main/java/com/linkedin/venice/pushmonitor/PushHealthStats.java b/internal/venice-common/src/main/java/com/linkedin/venice/pushmonitor/PushHealthStats.java index a552678d23..c6cf6f405d 100644 --- a/internal/venice-common/src/main/java/com/linkedin/venice/pushmonitor/PushHealthStats.java +++ b/internal/venice-common/src/main/java/com/linkedin/venice/pushmonitor/PushHealthStats.java @@ -15,7 +15,7 @@ public class PushHealthStats extends AbstractVeniceStats { private final Sensor successfulPushDurationSensorGauge; - public PushHealthStats(MetricsRepository metricsRepository, String storeName) { + public PushHealthStats(MetricsRepository metricsRepository, String storeName, String clusterName) { super(metricsRepository, storeName); failedPushDurationSensor = registerSensorIfAbsent("failed_push_duration_sec", new Avg(), new Max()); successfulPushDurationSensor = registerSensorIfAbsent("successful_push_duration_sec", new Avg(), new Max()); diff --git a/internal/venice-common/src/main/java/com/linkedin/venice/pushmonitor/PushStatusCleanUpStats.java b/internal/venice-common/src/main/java/com/linkedin/venice/pushmonitor/PushStatusCleanUpStats.java index 35b4819775..698ba21afa 100644 --- a/internal/venice-common/src/main/java/com/linkedin/venice/pushmonitor/PushStatusCleanUpStats.java +++ b/internal/venice-common/src/main/java/com/linkedin/venice/pushmonitor/PushStatusCleanUpStats.java @@ -13,7 +13,7 @@ public class PushStatusCleanUpStats extends AbstractVeniceStats { private final Sensor leakedPushStatusCleanUpServiceStateSensor; private final Sensor leakedPushStatusCountSensor; - public PushStatusCleanUpStats(MetricsRepository metricsRepository, String storeName) { + public PushStatusCleanUpStats(MetricsRepository metricsRepository, String storeName, String clusterName) { super(metricsRepository, storeName); leakedPushStatusCountSensor = registerSensorIfAbsent("leaked_push_status_count", new Gauge()); failedLeakedPushStatusCleanUpCountSensor = diff --git a/internal/venice-common/src/main/java/com/linkedin/venice/stats/AbstractVeniceAggStoreStats.java b/internal/venice-common/src/main/java/com/linkedin/venice/stats/AbstractVeniceAggStoreStats.java index 85b335f2ce..2a7662a094 100644 --- a/internal/venice-common/src/main/java/com/linkedin/venice/stats/AbstractVeniceAggStoreStats.java +++ b/internal/venice-common/src/main/java/com/linkedin/venice/stats/AbstractVeniceAggStoreStats.java @@ -18,17 +18,17 @@ public class AbstractVeniceAggStoreStats extends public AbstractVeniceAggStoreStats( String clusterName, MetricsRepository metricsRepository, - StatsSupplierMetricsRepository statsSupplier, + StatsSupplier statsSupplier, ReadOnlyStoreRepository metadataRepository, boolean isUnregisterMetricForDeletedStoreEnabled) { - super(clusterName, metricsRepository, statsSupplier); + super(metricsRepository, statsSupplier, clusterName); this.isUnregisterMetricForDeletedStoreEnabled = isUnregisterMetricForDeletedStoreEnabled; registerStoreDataChangedListenerIfRequired(metadataRepository); } public AbstractVeniceAggStoreStats( MetricsRepository metricsRepository, - StatsSupplierMetricsRepository statsSupplier, + StatsSupplier statsSupplier, ReadOnlyStoreRepository metadataRepository, boolean isUnregisterMetricForDeletedStoreEnabled) { super(metricsRepository, statsSupplier); diff --git a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/integration/utils/VeniceRouterWrapper.java b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/integration/utils/VeniceRouterWrapper.java index 39f4cc85dc..92a891b72b 100644 --- a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/integration/utils/VeniceRouterWrapper.java +++ b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/integration/utils/VeniceRouterWrapper.java @@ -22,6 +22,15 @@ import static com.linkedin.venice.ConfigKeys.ZOOKEEPER_ADDRESS; import static com.linkedin.venice.VeniceConstants.DEFAULT_PER_ROUTER_READ_QUOTA; import static com.linkedin.venice.integration.utils.VeniceClusterWrapperConstants.ROUTER_PORT_TO_USE_IN_VENICE_ROUTER_WRAPPER; +import static com.linkedin.venice.stats.VeniceMetricsConfig.OTEL_EXPORTER_OTLP_METRICS_DEFAULT_HISTOGRAM_AGGREGATION; +import static com.linkedin.venice.stats.VeniceMetricsConfig.OTEL_EXPORTER_OTLP_METRICS_DEFAULT_HISTOGRAM_AGGREGATION_MAX_BUCKETS; +import static com.linkedin.venice.stats.VeniceMetricsConfig.OTEL_EXPORTER_OTLP_METRICS_DEFAULT_HISTOGRAM_AGGREGATION_MAX_SCALE; +import static com.linkedin.venice.stats.VeniceMetricsConfig.OTEL_EXPORTER_OTLP_METRICS_ENDPOINT; +import static com.linkedin.venice.stats.VeniceMetricsConfig.OTEL_EXPORTER_OTLP_METRICS_PROTOCOL; +import static com.linkedin.venice.stats.VeniceMetricsConfig.OTEL_EXPORTER_OTLP_METRICS_TEMPORALITY_PREFERENCE; +import static com.linkedin.venice.stats.VeniceMetricsConfig.OTEL_VENICE_ENABLED; +import static com.linkedin.venice.stats.VeniceMetricsConfig.OTEL_VENICE_EXPORT_TO_ENDPOINT; +import static com.linkedin.venice.stats.VeniceMetricsConfig.OTEL_VENICE_EXPORT_TO_LOG; import com.linkedin.venice.client.store.ClientConfig; import com.linkedin.venice.helix.HelixBaseRoutingRepository; @@ -48,7 +57,6 @@ import java.util.Optional; import java.util.Properties; import java.util.concurrent.TimeUnit; -import org.apache.commons.cli.MissingArgumentException; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; @@ -154,15 +162,15 @@ static StatefulServiceProvider generateService( .put(MAX_READ_CAPACITY, DEFAULT_PER_ROUTER_READ_QUOTA) .put(SYSTEM_SCHEMA_CLUSTER_NAME, clusterName) .put(ROUTER_STORAGE_NODE_CLIENT_TYPE, StorageNodeClientType.APACHE_HTTP_ASYNC_CLIENT.name()) - .put("otel.venice.enabled", Boolean.TRUE.toString()) - .put("otel.venice.export.to.log", Boolean.TRUE.toString()) - .put("otel.venice.export.to.endpoint", Boolean.TRUE.toString()) - .put("otel.exporter.otlp.metrics.protocol", "http/protobuf") - .put("otel.exporter.otlp.metrics.endpoint", "http://localhost:4318/v1/metrics") - .put("otel.exporter.otlp.metrics.temporality.preference", "delta") - .put("otel.exporter.otlp.metrics.default.histogram.aggregation", "base2_exponential_bucket_histogram") - .put("otel.exporter.otlp.metrics.default.histogram.aggregation.max.scale", 3) - .put("otel.exporter.otlp.metrics.default.histogram.aggregation.max.buckets", 250) + .put(OTEL_VENICE_ENABLED, Boolean.TRUE.toString()) + .put(OTEL_VENICE_EXPORT_TO_LOG, Boolean.TRUE.toString()) + .put(OTEL_VENICE_EXPORT_TO_ENDPOINT, Boolean.TRUE.toString()) + .put(OTEL_EXPORTER_OTLP_METRICS_PROTOCOL, "http/protobuf") + .put(OTEL_EXPORTER_OTLP_METRICS_ENDPOINT, "http://localhost:4318/v1/metrics") + .put(OTEL_EXPORTER_OTLP_METRICS_TEMPORALITY_PREFERENCE, "delta") + .put(OTEL_EXPORTER_OTLP_METRICS_DEFAULT_HISTOGRAM_AGGREGATION, "base2_exponential_bucket_histogram") + .put(OTEL_EXPORTER_OTLP_METRICS_DEFAULT_HISTOGRAM_AGGREGATION_MAX_SCALE, 3) + .put(OTEL_EXPORTER_OTLP_METRICS_DEFAULT_HISTOGRAM_AGGREGATION_MAX_BUCKETS, 250) .put(properties); // setup d2 config first @@ -238,7 +246,7 @@ protected void internalStop() throws Exception { } @Override - protected void newProcess() throws MissingArgumentException { + protected void newProcess() { String httpURI = "http://" + getHost() + ":" + getPort(); String httpsURI = "https://" + getHost() + ":" + getSslPort(); diff --git a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/router/api/TestVeniceDispatcher.java b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/router/api/TestVeniceDispatcher.java index 86a71586d3..1349a56226 100644 --- a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/router/api/TestVeniceDispatcher.java +++ b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/router/api/TestVeniceDispatcher.java @@ -56,7 +56,6 @@ import java.util.Optional; import java.util.concurrent.Executor; import java.util.concurrent.TimeUnit; -import org.apache.commons.cli.MissingArgumentException; import org.apache.http.client.methods.HttpGet; import org.testng.Assert; import org.testng.annotations.Test; @@ -65,7 +64,7 @@ //TODO: refactor Dispatcher to take a HttpClient Factory, so we don't need to spin up an HTTP server for these tests public class TestVeniceDispatcher { @Test - public void testErrorRetry() throws MissingArgumentException { + public void testErrorRetry() { VeniceDispatcher dispatcher = getMockDispatcher(false, false); try { AsyncPromise> mockResponseFuture = mock(AsyncPromise.class); @@ -94,7 +93,7 @@ public void testErrorRetry() throws MissingArgumentException { } @Test - public void testErrorRetryOnPendingCheckFail() throws MissingArgumentException { + public void testErrorRetryOnPendingCheckFail() { VeniceDispatcher dispatcher = getMockDispatcher(true, false); try { AsyncPromise> mockResponseFuture = mock(AsyncPromise.class); @@ -124,7 +123,7 @@ public void testErrorRetryOnPendingCheckFail() throws MissingArgumentException { } @Test - public void testErrorRetryOnPendingCheckLeak() throws MissingArgumentException { + public void testErrorRetryOnPendingCheckLeak() { VeniceDispatcher dispatcher = getMockDispatcher(false, true); try { AsyncPromise> mockResponseFuture = mock(AsyncPromise.class); @@ -154,7 +153,7 @@ public void testErrorRetryOnPendingCheckLeak() throws MissingArgumentException { } @Test - public void passesThroughHttp429() throws MissingArgumentException { + public void passesThroughHttp429() { VeniceDispatcher dispatcher = getMockDispatcher(false, false); try { AsyncPromise> mockResponseFuture = mock(AsyncPromise.class); @@ -183,7 +182,7 @@ public void passesThroughHttp429() throws MissingArgumentException { } @Test - public void passThroughCompressedDataIfClientSupportsDecompressionForSingleGet() throws MissingArgumentException { + public void passThroughCompressedDataIfClientSupportsDecompressionForSingleGet() { VeniceDispatcher dispatcher = getMockDispatcher(false, false); try { AsyncPromise> mockResponseFuture = mock(AsyncPromise.class); @@ -216,7 +215,7 @@ public void passThroughCompressedDataIfClientSupportsDecompressionForSingleGet() } @Test - public void decompressRecordIfClientDoesntSupportsDecompressionForSingleGet() throws MissingArgumentException { + public void decompressRecordIfClientDoesntSupportsDecompressionForSingleGet() { VeniceDispatcher dispatcher = getMockDispatcher(false, false); try { AsyncPromise> mockResponseFuture = mock(AsyncPromise.class); @@ -248,7 +247,7 @@ public void decompressRecordIfClientDoesntSupportsDecompressionForSingleGet() th } @Test - public void passThroughCompressedDataIfClientSupportsDecompressionForMultiGet() throws MissingArgumentException { + public void passThroughCompressedDataIfClientSupportsDecompressionForMultiGet() { VeniceDispatcher dispatcher = getMockDispatcher(false, false); try { AsyncPromise> mockResponseFuture = mock(AsyncPromise.class); @@ -280,7 +279,7 @@ public void passThroughCompressedDataIfClientSupportsDecompressionForMultiGet() } @Test - public void decompressRecordIfClientDoesntSupportsDecompressionForMultiGet() throws MissingArgumentException { + public void decompressRecordIfClientDoesntSupportsDecompressionForMultiGet() { VeniceDispatcher dispatcher = getMockDispatcher(false, false); try { AsyncPromise> mockResponseFuture = mock(AsyncPromise.class); @@ -311,8 +310,7 @@ public void decompressRecordIfClientDoesntSupportsDecompressionForMultiGet() thr } } - private VeniceDispatcher getMockDispatcher(boolean forcePendingCheck, boolean forceLeakPending) - throws MissingArgumentException { + private VeniceDispatcher getMockDispatcher(boolean forcePendingCheck, boolean forceLeakPending) { VeniceRouterConfig routerConfig = mock(VeniceRouterConfig.class); doReturn(2).when(routerConfig).getHttpClientPoolSize(); doReturn(10).when(routerConfig).getMaxOutgoingConn(); diff --git a/services/venice-controller/src/main/java/com/linkedin/venice/controller/stats/AggPartitionHealthStats.java b/services/venice-controller/src/main/java/com/linkedin/venice/controller/stats/AggPartitionHealthStats.java index 56eaa9e3ee..de07ad910c 100644 --- a/services/venice-controller/src/main/java/com/linkedin/venice/controller/stats/AggPartitionHealthStats.java +++ b/services/venice-controller/src/main/java/com/linkedin/venice/controller/stats/AggPartitionHealthStats.java @@ -37,7 +37,7 @@ protected AggPartitionHealthStats( String clusterName, ReadOnlyStoreRepository storeRepository, PushMonitor pushMonitor) { - super(clusterName, null, (metricRepo, resourceName) -> new PartitionHealthStats(resourceName)); + super(null, (metricRepo, resourceName, cluster) -> new PartitionHealthStats(resourceName), clusterName); this.storeRepository = storeRepository; this.pushMonitor = pushMonitor; } @@ -48,7 +48,7 @@ public AggPartitionHealthStats( RoutingDataRepository routingDataRepository, ReadOnlyStoreRepository storeRepository, PushMonitor pushMonitor) { - super(clusterName, metricsRepository, PartitionHealthStats::new); + super(metricsRepository, PartitionHealthStats::new, clusterName); this.storeRepository = storeRepository; this.pushMonitor = pushMonitor; diff --git a/services/venice-controller/src/main/java/com/linkedin/venice/controller/stats/PartitionHealthStats.java b/services/venice-controller/src/main/java/com/linkedin/venice/controller/stats/PartitionHealthStats.java index 7d269238bc..4938ec8bea 100644 --- a/services/venice-controller/src/main/java/com/linkedin/venice/controller/stats/PartitionHealthStats.java +++ b/services/venice-controller/src/main/java/com/linkedin/venice/controller/stats/PartitionHealthStats.java @@ -22,7 +22,7 @@ public PartitionHealthStats(String resourceName) { super(null, resourceName); } - public PartitionHealthStats(MetricsRepository metricsRepository, String name) { + public PartitionHealthStats(MetricsRepository metricsRepository, String name, String clusterName) { super(metricsRepository, name); synchronized (PartitionHealthStats.class) { Sensor existingMetric = metricsRepository.getSensor(getSensorFullName(UNDER_REPLICATED_PARTITION_SENSOR)); diff --git a/services/venice-router/src/main/java/com/linkedin/venice/router/RouterServer.java b/services/venice-router/src/main/java/com/linkedin/venice/router/RouterServer.java index 15e9d33bbe..35d6c68bb7 100644 --- a/services/venice-router/src/main/java/com/linkedin/venice/router/RouterServer.java +++ b/services/venice-router/src/main/java/com/linkedin/venice/router/RouterServer.java @@ -124,7 +124,6 @@ import java.util.function.Consumer; import java.util.function.LongSupplier; import javax.annotation.Nonnull; -import org.apache.commons.cli.MissingArgumentException; import org.apache.helix.InstanceType; import org.apache.helix.manager.zk.ZKHelixManager; import org.apache.helix.zookeeper.impl.client.ZkClient; @@ -270,7 +269,7 @@ public RouterServer( VeniceProperties properties, List serviceDiscoveryAnnouncers, Optional accessController, - Optional sslFactory) throws MissingArgumentException { + Optional sslFactory) { this( properties, serviceDiscoveryAnnouncers, @@ -303,28 +302,6 @@ public RouterServer( DEFAULT_CLUSTER_DISCOVERY_D2_SERVICE_NAME); } - public RouterServer( - VeniceProperties properties, - List serviceDiscoveryAnnouncers, - Optional accessController, - Optional sslFactory, - MetricsRepository metricsRepository, - D2Client d2Client, - String d2ServiceName) throws MissingArgumentException { - this( - properties, - serviceDiscoveryAnnouncers, - accessController, - sslFactory, - new VeniceMetricsRepository( - metricsRepository, - new VeniceMetricsConfig.Builder().setServiceName(ROUTER_SERVICE_NAME) - .extractAndSetOtelConfigs(properties.getAsMap()) - .build()), - d2Client, - d2ServiceName); - } - public RouterServer( VeniceProperties properties, List serviceDiscoveryAnnouncers, @@ -436,7 +413,7 @@ public RouterServer( HelixReadOnlyStoreConfigRepository storeConfigRepository, List serviceDiscoveryAnnouncers, Optional sslFactory, - HelixLiveInstanceMonitor liveInstanceMonitor) throws MissingArgumentException { + HelixLiveInstanceMonitor liveInstanceMonitor) { this( properties, serviceDiscoveryAnnouncers, diff --git a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/AggHostHealthStats.java b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/AggHostHealthStats.java index 61506493cd..230f12f005 100644 --- a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/AggHostHealthStats.java +++ b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/AggHostHealthStats.java @@ -8,8 +8,10 @@ public class AggHostHealthStats extends AbstractVeniceAggStats { public AggHostHealthStats(VeniceMetricsRepository metricsRepository, String clusterName) { super( - (repo, hostName, cluster) -> new HostHealthStats(repo, StatsUtils.convertHostnameToMetricName(hostName)), metricsRepository, + (repo, hostName, cluster) -> new HostHealthStats( + (VeniceMetricsRepository) repo, + StatsUtils.convertHostnameToMetricName(hostName)), clusterName); } diff --git a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/AggRouterHttpRequestStats.java b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/AggRouterHttpRequestStats.java index e39b072a19..38d1321a32 100644 --- a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/AggRouterHttpRequestStats.java +++ b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/AggRouterHttpRequestStats.java @@ -51,7 +51,7 @@ public AggRouterHttpRequestStats( } return new RouterHttpRequestStats( - metricsRepo, + (VeniceMetricsRepository) metricsRepo, storeName, clusterName, requestType, diff --git a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouterHttpRequestStats.java b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouterHttpRequestStats.java index d5753de7f2..504d29f8ba 100644 --- a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouterHttpRequestStats.java +++ b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouterHttpRequestStats.java @@ -46,24 +46,15 @@ import io.tehuti.metrics.stats.Total; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; -import org.apache.commons.cli.MissingArgumentException; public class RouterHttpRequestStats extends AbstractVeniceHttpStats { private static final MetricConfig METRIC_CONFIG = new MetricConfig().timeWindow(10, TimeUnit.SECONDS); - private static final VeniceMetricsRepository localMetricRepo; - - static { - try { - localMetricRepo = new VeniceMetricsRepository( - new VeniceMetricsConfig.Builder().setServiceName(ROUTER_SERVICE_NAME) - .setMetricPrefix(ROUTER_SERVICE_METRIC_PREFIX) - .setTehutiMetricConfig(METRIC_CONFIG) - .build()); - } catch (MissingArgumentException e) { - throw new RuntimeException(e); - } - } + private static final VeniceMetricsRepository localMetricRepo = new VeniceMetricsRepository( + new VeniceMetricsConfig.Builder().setServiceName(ROUTER_SERVICE_NAME) + .setMetricPrefix(ROUTER_SERVICE_METRIC_PREFIX) + .setTehutiMetricConfig(METRIC_CONFIG) + .build()); private final static Sensor totalInflightRequestSensor = localMetricRepo.sensor("total_inflight_request"); static { diff --git a/services/venice-router/src/test/java/com/linkedin/venice/router/AggRouterHttpRequestStatsTest.java b/services/venice-router/src/test/java/com/linkedin/venice/router/AggRouterHttpRequestStatsTest.java index 0551ed7b6b..2af917f1bd 100644 --- a/services/venice-router/src/test/java/com/linkedin/venice/router/AggRouterHttpRequestStatsTest.java +++ b/services/venice-router/src/test/java/com/linkedin/venice/router/AggRouterHttpRequestStatsTest.java @@ -7,7 +7,6 @@ import com.linkedin.venice.router.stats.AggRouterHttpRequestStats; import com.linkedin.venice.stats.VeniceMetricsRepository; import com.linkedin.venice.tehuti.MockTehutiReporter; -import org.apache.commons.cli.MissingArgumentException; import org.mockito.Mockito; import org.testng.Assert; import org.testng.annotations.BeforeSuite; @@ -20,7 +19,7 @@ public class AggRouterHttpRequestStatsTest { private ReadOnlyStoreRepository storeMetadataRepository; @BeforeSuite - public void setUp() throws MissingArgumentException { + public void setUp() { this.metricsRepository = new VeniceMetricsRepository(); reporter = new MockTehutiReporter(); metricsRepository.addReporter(reporter); diff --git a/services/venice-router/src/test/java/com/linkedin/venice/router/RouteHttpRequestStatsTest.java b/services/venice-router/src/test/java/com/linkedin/venice/router/RouteHttpRequestStatsTest.java index 86e42b7dd8..fdfa96bb08 100644 --- a/services/venice-router/src/test/java/com/linkedin/venice/router/RouteHttpRequestStatsTest.java +++ b/services/venice-router/src/test/java/com/linkedin/venice/router/RouteHttpRequestStatsTest.java @@ -10,7 +10,6 @@ import com.linkedin.venice.stats.VeniceMetricsRepository; import com.linkedin.venice.tehuti.MockTehutiReporter; import com.linkedin.venice.utils.metrics.MetricsRepositoryUtils; -import org.apache.commons.cli.MissingArgumentException; import org.testng.Assert; import org.testng.annotations.BeforeSuite; import org.testng.annotations.Test; @@ -22,7 +21,7 @@ public class RouteHttpRequestStatsTest { private RouterHttpRequestStats routerHttpRequestStats; @BeforeSuite - public void setUp() throws MissingArgumentException { + public void setUp() { VeniceMetricsRepository metrics = MetricsRepositoryUtils.createSingleThreadedVeniceMetricsRepository(); reporter = new MockTehutiReporter(); metrics.addReporter(reporter); diff --git a/services/venice-router/src/test/java/com/linkedin/venice/router/api/TestVeniceDelegateMode.java b/services/venice-router/src/test/java/com/linkedin/venice/router/api/TestVeniceDelegateMode.java index 55b60a648d..01377fc91b 100644 --- a/services/venice-router/src/test/java/com/linkedin/venice/router/api/TestVeniceDelegateMode.java +++ b/services/venice-router/src/test/java/com/linkedin/venice/router/api/TestVeniceDelegateMode.java @@ -53,7 +53,6 @@ import java.util.Map; import java.util.Set; import javax.annotation.Nonnull; -import org.apache.commons.cli.MissingArgumentException; import org.apache.http.client.methods.HttpUriRequest; import org.mockito.ArgumentCaptor; import org.testng.Assert; @@ -223,18 +222,14 @@ private VenicePathParser getPathParser() { @BeforeClass public void setUp() { - RouterExceptionAndTrackingUtils.setRouterStats(new RouterStats<>(requestType -> { - try { - return new AggRouterHttpRequestStats( - new VeniceMetricsRepository(), - "test-cluster", - requestType, - mock(ReadOnlyStoreRepository.class), - true); - } catch (MissingArgumentException e) { - throw new RuntimeException(e); - } - })); + RouterExceptionAndTrackingUtils.setRouterStats( + new RouterStats<>( + requestType -> new AggRouterHttpRequestStats( + new VeniceMetricsRepository(), + "test-cluster", + requestType, + mock(ReadOnlyStoreRepository.class), + true))); } @AfterClass @@ -380,18 +375,16 @@ public void testLeastLoadedOnSlowHosts() throws RouterException { VeniceRouterConfig config = mock(VeniceRouterConfig.class); doReturn(LEAST_LOADED_ROUTING).when(config).getMultiKeyRoutingStrategy(); - VeniceDelegateMode scatterMode = new VeniceDelegateMode(config, new RouterStats<>(requestType -> { - try { - return new AggRouterHttpRequestStats( - new VeniceMetricsRepository(), - "test-cluster", - requestType, - mock(ReadOnlyStoreRepository.class), - true); - } catch (MissingArgumentException e) { - throw new RuntimeException(e); - } - }), mock(RouteHttpRequestStats.class)); + VeniceDelegateMode scatterMode = new VeniceDelegateMode( + config, + new RouterStats<>( + requestType -> new AggRouterHttpRequestStats( + new VeniceMetricsRepository(), + "test-cluster", + requestType, + mock(ReadOnlyStoreRepository.class), + true)), + mock(RouteHttpRequestStats.class)); scatterMode.initReadRequestThrottler(throttler); Scatter finalScatter = scatterMode @@ -620,7 +613,7 @@ public void testScatterWithStreamingMultiGet() throws RouterException { } @Test - public void testScatterForMultiGetWithHelixAssistedRouting() throws RouterException, MissingArgumentException { + public void testScatterForMultiGetWithHelixAssistedRouting() throws RouterException { String storeName = Utils.getUniqueString("test_store"); int version = 1; String resourceName = storeName + "_v" + version; diff --git a/services/venice-router/src/test/java/com/linkedin/venice/router/api/TestVenicePathParser.java b/services/venice-router/src/test/java/com/linkedin/venice/router/api/TestVenicePathParser.java index 2ef0d0c291..2ab4e77908 100644 --- a/services/venice-router/src/test/java/com/linkedin/venice/router/api/TestVenicePathParser.java +++ b/services/venice-router/src/test/java/com/linkedin/venice/router/api/TestVenicePathParser.java @@ -52,7 +52,6 @@ import java.util.Map; import java.util.UUID; import java.util.concurrent.ScheduledExecutorService; -import org.apache.commons.cli.MissingArgumentException; import org.mockito.Mockito; import org.testng.Assert; import org.testng.annotations.AfterClass; @@ -98,18 +97,14 @@ RouterStats getMockedStats() { @BeforeClass public void setUp() { - RouterExceptionAndTrackingUtils.setRouterStats(new RouterStats<>(requestType -> { - try { - return new AggRouterHttpRequestStats( - new VeniceMetricsRepository(), - CLUSTER, - requestType, - mock(ReadOnlyStoreRepository.class), - true); - } catch (MissingArgumentException e) { - throw new RuntimeException(e); - } - })); + RouterExceptionAndTrackingUtils.setRouterStats( + new RouterStats<>( + requestType -> new AggRouterHttpRequestStats( + new VeniceMetricsRepository(), + CLUSTER, + requestType, + mock(ReadOnlyStoreRepository.class), + true))); } @AfterClass diff --git a/services/venice-router/src/test/java/com/linkedin/venice/router/api/path/TestVeniceMultiGetPath.java b/services/venice-router/src/test/java/com/linkedin/venice/router/api/path/TestVeniceMultiGetPath.java index 464b160010..02e6fb1bdb 100644 --- a/services/venice-router/src/test/java/com/linkedin/venice/router/api/path/TestVeniceMultiGetPath.java +++ b/services/venice-router/src/test/java/com/linkedin/venice/router/api/path/TestVeniceMultiGetPath.java @@ -29,7 +29,6 @@ import java.util.ArrayList; import java.util.List; import java.util.Optional; -import org.apache.commons.cli.MissingArgumentException; import org.testng.Assert; import org.testng.annotations.AfterClass; import org.testng.annotations.BeforeClass; @@ -40,23 +39,19 @@ public class TestVeniceMultiGetPath { private final RetryManager disabledRetryManager = new RetryManager(new VeniceMetricsRepository(), "disabled-test-retry-manager", 0, 0, null); - public TestVeniceMultiGetPath() throws MissingArgumentException { + public TestVeniceMultiGetPath() { } @BeforeClass public void setUp() { - RouterExceptionAndTrackingUtils.setRouterStats(new RouterStats<>(requestType -> { - try { - return new AggRouterHttpRequestStats( - new VeniceMetricsRepository(), - "test-cluster", - requestType, - mock(ReadOnlyStoreRepository.class), - true); - } catch (MissingArgumentException e) { - throw new RuntimeException(e); - } - })); + RouterExceptionAndTrackingUtils.setRouterStats( + new RouterStats<>( + requestType -> new AggRouterHttpRequestStats( + new VeniceMetricsRepository(), + "test-cluster", + requestType, + mock(ReadOnlyStoreRepository.class), + true))); } @AfterClass diff --git a/services/venice-router/src/test/java/com/linkedin/venice/router/api/path/TestVenicePath.java b/services/venice-router/src/test/java/com/linkedin/venice/router/api/path/TestVenicePath.java index 04800c3ac3..eef224113d 100644 --- a/services/venice-router/src/test/java/com/linkedin/venice/router/api/path/TestVenicePath.java +++ b/services/venice-router/src/test/java/com/linkedin/venice/router/api/path/TestVenicePath.java @@ -21,7 +21,6 @@ import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.TimeUnit; import javax.annotation.Nonnull; -import org.apache.commons.cli.MissingArgumentException; import org.apache.http.client.methods.HttpUriRequest; import org.testng.Assert; import org.testng.annotations.AfterClass; @@ -89,7 +88,7 @@ public String getLocation() { private final ScheduledExecutorService retryManagerScheduler = Executors.newScheduledThreadPool(1); @BeforeMethod - public void setUp() throws MissingArgumentException { + public void setUp() { metricsRepository = new VeniceMetricsRepository(); // retry manager is disabled by default disabledRetryManager = diff --git a/services/venice-router/src/test/java/com/linkedin/venice/router/stats/AdminOperationsStatsTest.java b/services/venice-router/src/test/java/com/linkedin/venice/router/stats/AdminOperationsStatsTest.java index 2672eba9fb..35364da6b1 100644 --- a/services/venice-router/src/test/java/com/linkedin/venice/router/stats/AdminOperationsStatsTest.java +++ b/services/venice-router/src/test/java/com/linkedin/venice/router/stats/AdminOperationsStatsTest.java @@ -6,14 +6,13 @@ import com.linkedin.venice.stats.VeniceMetricsRepository; import com.linkedin.venice.tehuti.MockTehutiReporter; import com.linkedin.venice.utils.metrics.MetricsRepositoryUtils; -import org.apache.commons.cli.MissingArgumentException; import org.testng.Assert; import org.testng.annotations.Test; public class AdminOperationsStatsTest { @Test - public void testAdminOperationsStats() throws MissingArgumentException { + public void testAdminOperationsStats() { VeniceMetricsRepository metrics = MetricsRepositoryUtils.createSingleThreadedVeniceMetricsRepository(); MockTehutiReporter reporter = new MockTehutiReporter(); metrics.addReporter(reporter); diff --git a/services/venice-server/src/main/java/com/linkedin/venice/stats/AggRocksDBStats.java b/services/venice-server/src/main/java/com/linkedin/venice/stats/AggRocksDBStats.java index e9b504a3d4..e2187ac785 100644 --- a/services/venice-server/src/main/java/com/linkedin/venice/stats/AggRocksDBStats.java +++ b/services/venice-server/src/main/java/com/linkedin/venice/stats/AggRocksDBStats.java @@ -9,7 +9,7 @@ */ public class AggRocksDBStats extends AbstractVeniceAggStats { public AggRocksDBStats(MetricsRepository metricsRepository, Statistics aggStat) { - super(metricsRepository, (metricsRepo, storeName) -> new RocksDBStats(metricsRepository, storeName)); + super(metricsRepository, (metricsRepo, storeName, clusterName) -> new RocksDBStats(metricsRepository, storeName)); totalStats.setRocksDBStat(aggStat); } } diff --git a/services/venice-server/src/main/java/com/linkedin/venice/stats/AggServerHttpRequestStats.java b/services/venice-server/src/main/java/com/linkedin/venice/stats/AggServerHttpRequestStats.java index 1fe0116fc9..c11ba2c2fd 100644 --- a/services/venice-server/src/main/java/com/linkedin/venice/stats/AggServerHttpRequestStats.java +++ b/services/venice-server/src/main/java/com/linkedin/venice/stats/AggServerHttpRequestStats.java @@ -25,7 +25,7 @@ public AggServerHttpRequestStats( unregisterMetricForDeletedStoreEnabled); } - static class ServerHttpRequestStatsSupplier implements StatsSupplierMetricsRepository { + static class ServerHttpRequestStatsSupplier implements StatsSupplier { private final RequestType requestType; private final boolean isKeyValueProfilingEnabled; @@ -41,7 +41,7 @@ static class ServerHttpRequestStatsSupplier implements StatsSupplierMetricsRepos } @Override - public ServerHttpRequestStats get(MetricsRepository metricsRepository, String storeName) { + public ServerHttpRequestStats get(MetricsRepository metricsRepository, String storeName, String clusterName) { throw new VeniceException("Should not be called."); } @@ -49,6 +49,7 @@ public ServerHttpRequestStats get(MetricsRepository metricsRepository, String st public ServerHttpRequestStats get( MetricsRepository metricsRepository, String storeName, + String clusterName, ServerHttpRequestStats totalStats) { return new ServerHttpRequestStats( metricsRepository, diff --git a/services/venice-server/src/main/java/com/linkedin/venice/stats/AggServerQuotaUsageStats.java b/services/venice-server/src/main/java/com/linkedin/venice/stats/AggServerQuotaUsageStats.java index cf47bb7f1d..568dc555fa 100644 --- a/services/venice-server/src/main/java/com/linkedin/venice/stats/AggServerQuotaUsageStats.java +++ b/services/venice-server/src/main/java/com/linkedin/venice/stats/AggServerQuotaUsageStats.java @@ -11,7 +11,7 @@ public class AggServerQuotaUsageStats extends AbstractVeniceAggStats new ServerReadQuotaUsageStats(metrics, storeName)); + super(metricsRepository, (metrics, storeName, clusterName) -> new ServerReadQuotaUsageStats(metrics, storeName)); totalStats.setCurrentVersion(SINGLE_VERSION_FOR_TOTAL_STATS); } From 63b27bee5eb7dd71458ec5b492df7ffb897e8e80 Mon Sep 17 00:00:00 2001 From: Manoj Nagarajan Date: Tue, 19 Nov 2024 05:13:48 -0800 Subject: [PATCH 04/19] address review comments --- .../linkedin/venice/stats/MetricEntity.java | 70 ++++++++++++++ .../VeniceOpenTelemetryMetricsRepository.java | 16 ++++ .../VeniceHttpResponseStatusCodeCategory.java | 0 .../dimensions/VeniceMetricsDimensions.java | 0 .../VeniceRequestRetryAbortReason.java | 0 .../dimensions/VeniceRequestRetryType.java | 0 .../VeniceRequestValidationOutcome.java | 0 .../VeniceResponseStatusCategory.java | 0 ...iceHttpResponseStatusCodeCategoryTest.java | 0 .../VeniceMetricsDimensionsTest.java | 0 .../VeniceRequestRetryAbortReasonTest.java | 0 .../VeniceRequestRetryTypeTest.java | 0 .../VeniceRequestValidationOutcomeTest.java | 0 .../VeniceResponseStatusCategoryTest.java | 0 services/venice-router/build.gradle | 3 - .../router/stats/RouterHttpRequestStats.java | 44 +++++---- .../router/stats/RouterMetricEntities.java | 95 +++++++++++++++++++ 17 files changed, 207 insertions(+), 21 deletions(-) create mode 100644 internal/venice-client-common/src/main/java/com/linkedin/venice/stats/MetricEntity.java rename internal/{venice-common => venice-client-common}/src/main/java/com/linkedin/venice/stats/dimensions/VeniceHttpResponseStatusCodeCategory.java (100%) rename internal/{venice-common => venice-client-common}/src/main/java/com/linkedin/venice/stats/dimensions/VeniceMetricsDimensions.java (100%) rename internal/{venice-common => venice-client-common}/src/main/java/com/linkedin/venice/stats/dimensions/VeniceRequestRetryAbortReason.java (100%) rename internal/{venice-common => venice-client-common}/src/main/java/com/linkedin/venice/stats/dimensions/VeniceRequestRetryType.java (100%) rename internal/{venice-common => venice-client-common}/src/main/java/com/linkedin/venice/stats/dimensions/VeniceRequestValidationOutcome.java (100%) rename internal/{venice-common => venice-client-common}/src/main/java/com/linkedin/venice/stats/dimensions/VeniceResponseStatusCategory.java (100%) rename internal/{venice-common => venice-client-common}/src/test/java/com/linkedin/venice/stats/dimensions/VeniceHttpResponseStatusCodeCategoryTest.java (100%) rename internal/{venice-common => venice-client-common}/src/test/java/com/linkedin/venice/stats/dimensions/VeniceMetricsDimensionsTest.java (100%) rename internal/{venice-common => venice-client-common}/src/test/java/com/linkedin/venice/stats/dimensions/VeniceRequestRetryAbortReasonTest.java (100%) rename internal/{venice-common => venice-client-common}/src/test/java/com/linkedin/venice/stats/dimensions/VeniceRequestRetryTypeTest.java (100%) rename internal/{venice-common => venice-client-common}/src/test/java/com/linkedin/venice/stats/dimensions/VeniceRequestValidationOutcomeTest.java (100%) rename internal/{venice-common => venice-client-common}/src/test/java/com/linkedin/venice/stats/dimensions/VeniceResponseStatusCategoryTest.java (100%) create mode 100644 services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouterMetricEntities.java diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/MetricEntity.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/MetricEntity.java new file mode 100644 index 0000000000..b3655bf8f8 --- /dev/null +++ b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/MetricEntity.java @@ -0,0 +1,70 @@ +package com.linkedin.venice.stats; + +import com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions; +import java.util.Set; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import org.apache.commons.lang.Validate; + + +/** + * Metric entity class to define a metric with all its properties + */ +public class MetricEntity { + public enum MetricType { + HISTOGRAM, HISTOGRAM_WITHOUT_BUCKETS, COUNTER + }; + + private final String metricName; + private final MetricType metricType; + private final String unit; + private final String description; + private final Set dimensionsList; + + public MetricEntity( + @Nonnull String metricName, + @Nonnull MetricType metricType, + @Nonnull String unit, + @Nonnull String description) { + this(metricName, metricType, unit, description, null); + } + + public MetricEntity( + @Nonnull String metricName, + @Nonnull MetricType metricType, + @Nonnull String unit, + @Nonnull String description, + @Nullable Set dimensionsList) { + Validate.notEmpty(metricName, "Metric name cannot be null or empty"); + this.metricName = metricName; + this.metricType = metricType; + this.unit = unit; + this.description = description; + this.dimensionsList = dimensionsList; + } + + @Nonnull + public String getMetricName() { + return metricName; + } + + @Nonnull + public MetricType getMetricType() { + return metricType; + } + + @Nonnull + public String getUnit() { + return unit; + } + + @Nonnull + public String getDescription() { + return description; + } + + @Nullable + public Set getDimensionsList() { + return dimensionsList; + } +} diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceOpenTelemetryMetricsRepository.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceOpenTelemetryMetricsRepository.java index 478c3738bf..b43265bb4f 100644 --- a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceOpenTelemetryMetricsRepository.java +++ b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceOpenTelemetryMetricsRepository.java @@ -154,6 +154,22 @@ public LongCounter getCounter(String name, String unit, String description) { }); } + public Object getInstrument(MetricEntity metricEntity) { + switch (metricEntity.getMetricType()) { + case HISTOGRAM: + return getHistogram(metricEntity.getMetricName(), metricEntity.getUnit(), metricEntity.getDescription()); + case HISTOGRAM_WITHOUT_BUCKETS: + return getHistogramWithoutBuckets( + metricEntity.getMetricName(), + metricEntity.getUnit(), + metricEntity.getDescription()); + case COUNTER: + return getCounter(metricEntity.getMetricName(), metricEntity.getUnit(), metricEntity.getDescription()); + default: + throw new VeniceException("Unknown metric type: " + metricEntity.getMetricType()); + } + } + public void close() { LOGGER.info("OpenTelemetry close"); if (sdkMeterProvider != null) { diff --git a/internal/venice-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceHttpResponseStatusCodeCategory.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceHttpResponseStatusCodeCategory.java similarity index 100% rename from internal/venice-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceHttpResponseStatusCodeCategory.java rename to internal/venice-client-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceHttpResponseStatusCodeCategory.java diff --git a/internal/venice-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceMetricsDimensions.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceMetricsDimensions.java similarity index 100% rename from internal/venice-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceMetricsDimensions.java rename to internal/venice-client-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceMetricsDimensions.java diff --git a/internal/venice-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceRequestRetryAbortReason.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceRequestRetryAbortReason.java similarity index 100% rename from internal/venice-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceRequestRetryAbortReason.java rename to internal/venice-client-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceRequestRetryAbortReason.java diff --git a/internal/venice-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceRequestRetryType.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceRequestRetryType.java similarity index 100% rename from internal/venice-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceRequestRetryType.java rename to internal/venice-client-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceRequestRetryType.java diff --git a/internal/venice-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceRequestValidationOutcome.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceRequestValidationOutcome.java similarity index 100% rename from internal/venice-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceRequestValidationOutcome.java rename to internal/venice-client-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceRequestValidationOutcome.java diff --git a/internal/venice-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceResponseStatusCategory.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceResponseStatusCategory.java similarity index 100% rename from internal/venice-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceResponseStatusCategory.java rename to internal/venice-client-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceResponseStatusCategory.java diff --git a/internal/venice-common/src/test/java/com/linkedin/venice/stats/dimensions/VeniceHttpResponseStatusCodeCategoryTest.java b/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/dimensions/VeniceHttpResponseStatusCodeCategoryTest.java similarity index 100% rename from internal/venice-common/src/test/java/com/linkedin/venice/stats/dimensions/VeniceHttpResponseStatusCodeCategoryTest.java rename to internal/venice-client-common/src/test/java/com/linkedin/venice/stats/dimensions/VeniceHttpResponseStatusCodeCategoryTest.java diff --git a/internal/venice-common/src/test/java/com/linkedin/venice/stats/dimensions/VeniceMetricsDimensionsTest.java b/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/dimensions/VeniceMetricsDimensionsTest.java similarity index 100% rename from internal/venice-common/src/test/java/com/linkedin/venice/stats/dimensions/VeniceMetricsDimensionsTest.java rename to internal/venice-client-common/src/test/java/com/linkedin/venice/stats/dimensions/VeniceMetricsDimensionsTest.java diff --git a/internal/venice-common/src/test/java/com/linkedin/venice/stats/dimensions/VeniceRequestRetryAbortReasonTest.java b/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/dimensions/VeniceRequestRetryAbortReasonTest.java similarity index 100% rename from internal/venice-common/src/test/java/com/linkedin/venice/stats/dimensions/VeniceRequestRetryAbortReasonTest.java rename to internal/venice-client-common/src/test/java/com/linkedin/venice/stats/dimensions/VeniceRequestRetryAbortReasonTest.java diff --git a/internal/venice-common/src/test/java/com/linkedin/venice/stats/dimensions/VeniceRequestRetryTypeTest.java b/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/dimensions/VeniceRequestRetryTypeTest.java similarity index 100% rename from internal/venice-common/src/test/java/com/linkedin/venice/stats/dimensions/VeniceRequestRetryTypeTest.java rename to internal/venice-client-common/src/test/java/com/linkedin/venice/stats/dimensions/VeniceRequestRetryTypeTest.java diff --git a/internal/venice-common/src/test/java/com/linkedin/venice/stats/dimensions/VeniceRequestValidationOutcomeTest.java b/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/dimensions/VeniceRequestValidationOutcomeTest.java similarity index 100% rename from internal/venice-common/src/test/java/com/linkedin/venice/stats/dimensions/VeniceRequestValidationOutcomeTest.java rename to internal/venice-client-common/src/test/java/com/linkedin/venice/stats/dimensions/VeniceRequestValidationOutcomeTest.java diff --git a/internal/venice-common/src/test/java/com/linkedin/venice/stats/dimensions/VeniceResponseStatusCategoryTest.java b/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/dimensions/VeniceResponseStatusCategoryTest.java similarity index 100% rename from internal/venice-common/src/test/java/com/linkedin/venice/stats/dimensions/VeniceResponseStatusCategoryTest.java rename to internal/venice-client-common/src/test/java/com/linkedin/venice/stats/dimensions/VeniceResponseStatusCategoryTest.java diff --git a/services/venice-router/build.gradle b/services/venice-router/build.gradle index e1bf3cef4e..fac67a86eb 100644 --- a/services/venice-router/build.gradle +++ b/services/venice-router/build.gradle @@ -65,9 +65,6 @@ dependencies { implementation project(':internal:alpini:router:alpini-router-api') implementation project(':internal:alpini:router:alpini-router-impl') implementation libraries.opentelemetryApi - implementation libraries.opentelemetrySdk - implementation libraries.opentelemetryExporterLogging - implementation libraries.opentelemetryExporterOtlp testImplementation project(':clients:venice-thin-client') testImplementation libraries.kafkaClientsTest // TODO: Get rid of Kafka dependency in venice-common (used by TopicCreator) diff --git a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouterHttpRequestStats.java b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouterHttpRequestStats.java index 504d29f8ba..347ff39b18 100644 --- a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouterHttpRequestStats.java +++ b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouterHttpRequestStats.java @@ -2,6 +2,15 @@ import static com.linkedin.venice.router.RouterServer.ROUTER_SERVICE_METRIC_PREFIX; import static com.linkedin.venice.router.RouterServer.ROUTER_SERVICE_NAME; +import static com.linkedin.venice.router.stats.RouterMetricEntities.ABORTED_RETRY_COUNT; +import static com.linkedin.venice.router.stats.RouterMetricEntities.ALLOWED_RETRY_COUNT; +import static com.linkedin.venice.router.stats.RouterMetricEntities.CALL_COUNT; +import static com.linkedin.venice.router.stats.RouterMetricEntities.CALL_KEY_COUNT; +import static com.linkedin.venice.router.stats.RouterMetricEntities.CALL_TIME; +import static com.linkedin.venice.router.stats.RouterMetricEntities.DISALLOWED_RETRY_COUNT; +import static com.linkedin.venice.router.stats.RouterMetricEntities.INCOMING_CALL_COUNT; +import static com.linkedin.venice.router.stats.RouterMetricEntities.RETRY_COUNT; +import static com.linkedin.venice.router.stats.RouterMetricEntities.RETRY_DELAY; import static com.linkedin.venice.stats.AbstractVeniceAggStats.STORE_NAME_FOR_TOTAL_STAT; import static com.linkedin.venice.stats.dimensions.VeniceHttpResponseStatusCodeCategory.getVeniceHttpResponseStatusCodeCategory; import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.HTTP_RESPONSE_STATUS_CODE; @@ -159,8 +168,8 @@ public RouterHttpRequestStats( Rate tardyRequestRate = new OccurrenceRate(); incomingRequestSensor = registerSensor("request", new Count(), requestRate); - incomingRequestSensorOtel = metricsRepository.getOpenTelemetryMetricsRepository() - .getCounter("incoming_call_count", "Number", "Count of all incoming requests"); + incomingRequestSensorOtel = + (LongCounter) metricsRepository.getOpenTelemetryMetricsRepository().getInstrument(INCOMING_CALL_COUNT); healthySensor = registerSensor("healthy_request", new Count(), healthyRequestRate); unhealthySensor = registerSensor("unhealthy_request", new Count()); @@ -171,30 +180,29 @@ public RouterHttpRequestStats( tardyRequestRatioSensor = registerSensor(new TehutiUtils.SimpleRatioStat(tardyRequestRate, requestRate, "tardy_request_ratio")); badRequestSensor = registerSensor("bad_request", new Count()); - requestSensorOtel = metricsRepository.getOpenTelemetryMetricsRepository() - .getCounter("call_count", "Number", "Count of all requests with response details"); + requestSensorOtel = (LongCounter) metricsRepository.getOpenTelemetryMetricsRepository().getInstrument(CALL_COUNT); errorRetryCountSensor = registerSensor("error_retry", new Count()); - retryTriggeredSensorOtel = metricsRepository.getOpenTelemetryMetricsRepository() - .getCounter("retry_call_count", "Number", "Count of retries triggered"); + retryTriggeredSensorOtel = + (LongCounter) metricsRepository.getOpenTelemetryMetricsRepository().getInstrument(RETRY_COUNT); allowedRetryRequestSensor = registerSensor("allowed_retry_request_count", new OccurrenceRate()); - allowedRetryRequestSensorOtel = metricsRepository.getOpenTelemetryMetricsRepository() - .getCounter("allowed_retry_call_count", "Number", "Count of allowed retry requests"); + allowedRetryRequestSensorOtel = + (LongCounter) metricsRepository.getOpenTelemetryMetricsRepository().getInstrument(ALLOWED_RETRY_COUNT); disallowedRetryRequestSensor = registerSensor("disallowed_retry_request_count", new OccurrenceRate()); - disallowedRetryRequestSensorOtel = metricsRepository.getOpenTelemetryMetricsRepository() - .getCounter("disallowed_retry_call_count", "Number", "Count of disallowed retry requests"); + disallowedRetryRequestSensorOtel = + (LongCounter) metricsRepository.getOpenTelemetryMetricsRepository().getInstrument(DISALLOWED_RETRY_COUNT); errorRetryAttemptTriggeredByPendingRequestCheckSensor = registerSensor("error_retry_attempt_triggered_by_pending_request_check", new OccurrenceRate()); retryDelaySensor = registerSensor("retry_delay", new Avg(), new Max()); - retryDelaySensorOtel = metricsRepository.getOpenTelemetryMetricsRepository() - .getHistogramWithoutBuckets("retry_delay", TimeUnit.MILLISECONDS.name(), "Retry delay time"); + retryDelaySensorOtel = + (DoubleHistogram) metricsRepository.getOpenTelemetryMetricsRepository().getInstrument(RETRY_DELAY); delayConstraintAbortedRetryRequest = registerSensor("delay_constraint_aborted_retry_request", new Count()); slowRouteAbortedRetryRequest = registerSensor("slow_route_aborted_retry_request", new Count()); retryRouteLimitAbortedRetryRequest = registerSensor("retry_route_limit_aborted_retry_request", new Count()); noAvailableReplicaAbortedRetryRequest = registerSensor("no_available_replica_aborted_retry_request", new Count()); - abortedRetrySensorOtel = metricsRepository.getOpenTelemetryMetricsRepository() - .getCounter("aborted_retry_call_count", "Number", "Count of aborted retry requests"); + abortedRetrySensorOtel = + (LongCounter) metricsRepository.getOpenTelemetryMetricsRepository().getInstrument(ABORTED_RETRY_COUNT); unavailableReplicaStreamingRequestSensor = registerSensor("unavailable_replica_streaming_request", new Count()); requestThrottledByRouterCapacitySensor = registerSensor("request_throttled_by_router_capacity", new Count()); @@ -206,8 +214,8 @@ public RouterHttpRequestStats( unhealthyRequestLatencySensor = registerSensor("unhealthy_request_latency", new Avg(), new Max(0)); tardyRequestLatencySensor = registerSensor("tardy_request_latency", new Avg(), new Max(0)); throttledRequestLatencySensor = registerSensor("throttled_request_latency", new Avg(), new Max(0)); - latencySensorOtel = metricsRepository.getOpenTelemetryMetricsRepository() - .getHistogram("call_time", TimeUnit.MILLISECONDS.name(), "Latency based on all responses"); + latencySensorOtel = + (DoubleHistogram) metricsRepository.getOpenTelemetryMetricsRepository().getInstrument(CALL_TIME); routerResponseWaitingTimeSensor = registerSensor( "response_waiting_time", @@ -244,8 +252,8 @@ public RouterHttpRequestStats( keyNumSensor = registerSensor("key_num", new Avg(), new Max(0)); badRequestKeyCountSensor = registerSensor("bad_request_key_count", new OccurrenceRate(), new Avg(), new Max()); - keyCountSensorOtel = metricsRepository.getOpenTelemetryMetricsRepository() - .getHistogramWithoutBuckets("call_key_count", "Number", "Count of keys in multi key requests"); + keyCountSensorOtel = + (DoubleHistogram) metricsRepository.getOpenTelemetryMetricsRepository().getInstrument(CALL_KEY_COUNT); /** * request_usage.Total is incoming KPS while request_usage.OccurrenceRate is QPS diff --git a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouterMetricEntities.java b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouterMetricEntities.java new file mode 100644 index 0000000000..05e88e9714 --- /dev/null +++ b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouterMetricEntities.java @@ -0,0 +1,95 @@ +package com.linkedin.venice.router.stats; + +import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.HTTP_RESPONSE_STATUS_CODE; +import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.HTTP_RESPONSE_STATUS_CODE_CATEGORY; +import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.VENICE_CLUSTER_NAME; +import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.VENICE_REQUEST_METHOD; +import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.VENICE_REQUEST_RETRY_ABORT_REASON; +import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.VENICE_REQUEST_RETRY_TYPE; +import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.VENICE_REQUEST_VALIDATION_OUTCOME; +import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.VENICE_RESPONSE_STATUS_CODE_CATEGORY; +import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.VENICE_STORE_NAME; +import static com.linkedin.venice.utils.Utils.setOf; + +import com.linkedin.venice.stats.MetricEntity; +import java.util.concurrent.TimeUnit; + + +/** + * List all Metric entities for router + */ +public class RouterMetricEntities { + public static final MetricEntity INCOMING_CALL_COUNT = new MetricEntity( + "incoming_call_count", + MetricEntity.MetricType.COUNTER, + "Number", + "Count of all incoming requests", + setOf(VENICE_STORE_NAME, VENICE_CLUSTER_NAME, VENICE_REQUEST_METHOD)); + + public static final MetricEntity CALL_COUNT = new MetricEntity( + "call_count", + MetricEntity.MetricType.COUNTER, + "Number", + "Count of all requests with response details", + setOf( + VENICE_STORE_NAME, + VENICE_CLUSTER_NAME, + VENICE_REQUEST_METHOD, + HTTP_RESPONSE_STATUS_CODE, + HTTP_RESPONSE_STATUS_CODE_CATEGORY, + VENICE_RESPONSE_STATUS_CODE_CATEGORY)); + + public static final MetricEntity CALL_TIME = new MetricEntity( + "call_time", + MetricEntity.MetricType.HISTOGRAM, + TimeUnit.MILLISECONDS.name(), + "Latency based on all responses", + setOf( + VENICE_STORE_NAME, + VENICE_CLUSTER_NAME, + VENICE_REQUEST_METHOD, + HTTP_RESPONSE_STATUS_CODE_CATEGORY, + VENICE_RESPONSE_STATUS_CODE_CATEGORY)); + + public static final MetricEntity CALL_KEY_COUNT = new MetricEntity( + "call_key_count", + MetricEntity.MetricType.HISTOGRAM_WITHOUT_BUCKETS, + "Number", + "Count of keys in multi key requests", + setOf(VENICE_STORE_NAME, VENICE_CLUSTER_NAME, VENICE_REQUEST_METHOD, VENICE_REQUEST_VALIDATION_OUTCOME)); + + public static final MetricEntity RETRY_COUNT = new MetricEntity( + "retry_count", + MetricEntity.MetricType.COUNTER, + "Number", + "Count of retries triggered", + setOf(VENICE_STORE_NAME, VENICE_CLUSTER_NAME, VENICE_REQUEST_METHOD, VENICE_REQUEST_RETRY_TYPE)); + + public static final MetricEntity ALLOWED_RETRY_COUNT = new MetricEntity( + "allowed_retry_count", + MetricEntity.MetricType.COUNTER, + "Number", + "Count of allowed retry requests", + setOf(VENICE_STORE_NAME, VENICE_CLUSTER_NAME, VENICE_REQUEST_METHOD)); + + public static final MetricEntity DISALLOWED_RETRY_COUNT = new MetricEntity( + "disallowed_retry_count", + MetricEntity.MetricType.COUNTER, + "Number", + "Count of disallowed retry requests", + setOf(VENICE_STORE_NAME, VENICE_CLUSTER_NAME, VENICE_REQUEST_METHOD)); + + public static final MetricEntity RETRY_DELAY = new MetricEntity( + "retry_delay", + MetricEntity.MetricType.HISTOGRAM_WITHOUT_BUCKETS, + TimeUnit.MILLISECONDS.name(), + "Retry delay time", + setOf(VENICE_STORE_NAME, VENICE_CLUSTER_NAME, VENICE_REQUEST_METHOD)); + + public static final MetricEntity ABORTED_RETRY_COUNT = new MetricEntity( + "aborted_retry_count", + MetricEntity.MetricType.COUNTER, + "Number", + "Count of aborted retry requests", + setOf(VENICE_STORE_NAME, VENICE_CLUSTER_NAME, VENICE_REQUEST_METHOD, VENICE_REQUEST_RETRY_ABORT_REASON)); +} From 6197b842fff310c9d1917f365aebd4cd68250e96 Mon Sep 17 00:00:00 2001 From: Manoj Nagarajan Date: Tue, 19 Nov 2024 12:25:01 -0800 Subject: [PATCH 05/19] simplify the getHistogram and getCounter methods and add javadoc for configs --- .../venice/stats/VeniceMetricsConfig.java | 53 ++++++++++++++++++- .../venice/stats/VeniceMetricsRepository.java | 5 +- .../VeniceOpenTelemetryMetricsRepository.java | 44 ++++++--------- ...iceOpenTelemetryMetricsRepositoryTest.java | 18 ++++--- 4 files changed, 85 insertions(+), 35 deletions(-) diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceMetricsConfig.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceMetricsConfig.java index 4c2ca5dab5..f52d62bb42 100644 --- a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceMetricsConfig.java +++ b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceMetricsConfig.java @@ -16,22 +16,73 @@ public class VeniceMetricsConfig { private static final Logger LOGGER = LogManager.getLogger(VeniceMetricsConfig.class); - // create constants for all the configs + + /** + * Config to enable OpenTelemetry metrics + */ public static final String OTEL_VENICE_ENABLED = "otel.venice.enabled"; + + /** + * Config to set the naming format for OpenTelemetry metrics + * {@link VeniceOpenTelemetryMetricNamingFormat} + */ public static final String OTEL_VENICE_METRICS_NAMING_FORMAT = "otel.venice.metrics.naming.format"; + + /** + * Export opentelemetry metrics to a log exporter + * {@link VeniceOpenTelemetryMetricsRepository.LogBasedMetricExporter} + */ public static final String OTEL_VENICE_EXPORT_TO_LOG = "otel.venice.export.to.log"; + + /** + * Export opentelemetry metrics to {@link #OTEL_EXPORTER_OTLP_METRICS_ENDPOINT} + * over {@link #OTEL_EXPORTER_OTLP_METRICS_PROTOCOL} + */ public static final String OTEL_VENICE_EXPORT_TO_ENDPOINT = "otel.venice.export.to.endpoint"; + + /** + * Protocol over which the metrics are exported to {@link #OTEL_EXPORTER_OTLP_METRICS_ENDPOINT}
+ * 1. {@link OtlpConfigUtil#PROTOCOL_HTTP_PROTOBUF} => "http/protobuf"
+ * 2. {@link OtlpConfigUtil#PROTOCOL_GRPC} => "grpc" + */ public static final String OTEL_EXPORTER_OTLP_METRICS_PROTOCOL = "otel.exporter.otlp.metrics.protocol"; + + /** + * The Endpoint to which the metrics are exported + */ public static final String OTEL_EXPORTER_OTLP_METRICS_ENDPOINT = "otel.exporter.otlp.metrics.endpoint"; + + /** + * Additional headers to pass while creating OpenTelemetry exporter + */ public static final String OTEL_EXPORTER_OTLP_METRICS_HEADERS = "otel.exporter.otlp.metrics.headers"; + + /** + * Aggregation Temporality selector to export only the delta or cumulate or different + */ public static final String OTEL_EXPORTER_OTLP_METRICS_TEMPORALITY_PREFERENCE = "otel.exporter.otlp.metrics.temporality.preference"; + + /** + * Default histogram aggregation to be used for all histograms: Select one of the below
+ * 1. base2_exponential_bucket_histogram
+ * 2. explicit_bucket_histogram + */ public static final String OTEL_EXPORTER_OTLP_METRICS_DEFAULT_HISTOGRAM_AGGREGATION = "otel.exporter.otlp.metrics.default.histogram.aggregation"; + + /** + * Max scale for base2_exponential_bucket_histogram + */ public static final String OTEL_EXPORTER_OTLP_METRICS_DEFAULT_HISTOGRAM_AGGREGATION_MAX_SCALE = "otel.exporter.otlp.metrics.default.histogram.aggregation.max.scale"; + + /** + * Max buckets for base2_exponential_bucket_histogram + */ public static final String OTEL_EXPORTER_OTLP_METRICS_DEFAULT_HISTOGRAM_AGGREGATION_MAX_BUCKETS = "otel.exporter.otlp.metrics.default.histogram.aggregation.max.buckets"; + private final String serviceName; private final String metricPrefix; /** reusing tehuti's MetricConfig */ diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceMetricsRepository.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceMetricsRepository.java index 3d94be4e49..9f6bd02b06 100644 --- a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceMetricsRepository.java +++ b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceMetricsRepository.java @@ -4,7 +4,10 @@ import java.io.Closeable; -/** extends MetricsRepository to keep the changes to a minimum. Next step would be to create a MetricsRepository inside rather than extending it */ +/** + * extends {@link MetricsRepository} to keep the changes to a minimum. + * Next step would be to create a MetricsRepository inside rather than extending it + */ public class VeniceMetricsRepository extends MetricsRepository implements Closeable { private VeniceMetricsConfig veniceMetricsConfig; VeniceOpenTelemetryMetricsRepository openTelemetryMetricsRepository; diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceOpenTelemetryMetricsRepository.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceOpenTelemetryMetricsRepository.java index b43265bb4f..2c77b88bec 100644 --- a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceOpenTelemetryMetricsRepository.java +++ b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceOpenTelemetryMetricsRepository.java @@ -118,38 +118,31 @@ private String getMetricPrefix() { return metricPrefix; } - public DoubleHistogram getHistogram(String name, String unit, String description) { + public DoubleHistogram getHistogram(MetricEntity metricEntity) { if (!emitOpenTelemetryMetrics) { return null; } - return histogramMap.computeIfAbsent(name, key -> { - String fullMetricName = getFullMetricName(getMetricPrefix(), name); - DoubleHistogramBuilder builder = meter.histogramBuilder(fullMetricName).setUnit(unit).setDescription(description); - return builder.build(); - }); - } - - public DoubleHistogram getHistogramWithoutBuckets(String name, String unit, String description) { - if (!emitOpenTelemetryMetrics) { - return null; - } - return histogramMap.computeIfAbsent(name, key -> { - String fullMetricName = getFullMetricName(getMetricPrefix(), name); + return histogramMap.computeIfAbsent(metricEntity.getMetricName(), key -> { + String fullMetricName = getFullMetricName(getMetricPrefix(), metricEntity.getMetricName()); DoubleHistogramBuilder builder = meter.histogramBuilder(fullMetricName) - .setExplicitBucketBoundariesAdvice(new ArrayList<>()) - .setUnit(unit) - .setDescription(description); + .setUnit(metricEntity.getUnit()) + .setDescription(metricEntity.getDescription()); + if (metricEntity.getMetricType() == MetricEntity.MetricType.HISTOGRAM_WITHOUT_BUCKETS) { + builder.setExplicitBucketBoundariesAdvice(new ArrayList<>()); + } return builder.build(); }); } - public LongCounter getCounter(String name, String unit, String description) { + public LongCounter getCounter(MetricEntity metricEntity) { if (!emitOpenTelemetryMetrics) { return null; } - return counterMap.computeIfAbsent(name, key -> { - String fullMetricName = getFullMetricName(getMetricPrefix(), name); - LongCounterBuilder builder = meter.counterBuilder(fullMetricName).setUnit(unit).setDescription(description); + return counterMap.computeIfAbsent(metricEntity.getMetricName(), key -> { + String fullMetricName = getFullMetricName(getMetricPrefix(), metricEntity.getMetricName()); + LongCounterBuilder builder = meter.counterBuilder(fullMetricName) + .setUnit(metricEntity.getUnit()) + .setDescription(metricEntity.getDescription()); return builder.build(); }); } @@ -157,14 +150,11 @@ public LongCounter getCounter(String name, String unit, String description) { public Object getInstrument(MetricEntity metricEntity) { switch (metricEntity.getMetricType()) { case HISTOGRAM: - return getHistogram(metricEntity.getMetricName(), metricEntity.getUnit(), metricEntity.getDescription()); case HISTOGRAM_WITHOUT_BUCKETS: - return getHistogramWithoutBuckets( - metricEntity.getMetricName(), - metricEntity.getUnit(), - metricEntity.getDescription()); + return getHistogram(metricEntity); + case COUNTER: - return getCounter(metricEntity.getMetricName(), metricEntity.getUnit(), metricEntity.getDescription()); + return getCounter(metricEntity); default: throw new VeniceException("Unknown metric type: " + metricEntity.getMetricType()); } diff --git a/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/VeniceOpenTelemetryMetricsRepositoryTest.java b/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/VeniceOpenTelemetryMetricsRepositoryTest.java index f9ec91df5e..5e2fdf1301 100644 --- a/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/VeniceOpenTelemetryMetricsRepositoryTest.java +++ b/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/VeniceOpenTelemetryMetricsRepositoryTest.java @@ -58,8 +58,10 @@ public void testConstructorWithEmitDisabled() { // Verify that metrics-related fields are null when metrics are disabled assertNull(metricsRepository.getSdkMeterProvider()); assertNull(metricsRepository.getMeter()); - assertNull(metricsRepository.getHistogram("test", "unit", "desc")); - assertNull(metricsRepository.getCounter("test", "unit", "desc")); + assertNull( + metricsRepository.getInstrument(new MetricEntity("test", MetricEntity.MetricType.HISTOGRAM, "unit", "desc"))); + assertNull( + metricsRepository.getInstrument(new MetricEntity("test", MetricEntity.MetricType.COUNTER, "unit", "desc"))); } @Test @@ -104,8 +106,10 @@ public void testTransformMetricName() { @Test public void testCreateTwoHistograms() { - DoubleHistogram histogram1 = metricsRepository.getHistogram("test_histogram", "unit", "description"); - DoubleHistogram histogram2 = metricsRepository.getHistogram("test_histogram", "unit", "description"); + DoubleHistogram histogram1 = (DoubleHistogram) metricsRepository + .getInstrument(new MetricEntity("test_histogram", MetricEntity.MetricType.HISTOGRAM, "unit", "desc")); + DoubleHistogram histogram2 = (DoubleHistogram) metricsRepository + .getInstrument(new MetricEntity("test_histogram", MetricEntity.MetricType.HISTOGRAM, "unit", "desc")); assertNotNull(histogram1); assertSame(histogram1, histogram2, "Should return the same instance for the same histogram name."); @@ -113,8 +117,10 @@ public void testCreateTwoHistograms() { @Test public void testCreateTwoCounters() { - LongCounter counter1 = metricsRepository.getCounter("test_counter", "unit", "description"); - LongCounter counter2 = metricsRepository.getCounter("test_counter", "unit", "description"); + LongCounter counter1 = (LongCounter) metricsRepository + .getInstrument(new MetricEntity("test_counter", MetricEntity.MetricType.COUNTER, "unit", "desc")); + LongCounter counter2 = (LongCounter) metricsRepository + .getInstrument(new MetricEntity("test_counter", MetricEntity.MetricType.COUNTER, "unit", "desc")); assertNotNull(counter1); assertSame(counter1, counter2, "Should return the same instance for the same counter name."); From e425296f7a9ec6525dfa5d79ba640401b7a9f338 Mon Sep 17 00:00:00 2001 From: Manoj Nagarajan Date: Wed, 20 Nov 2024 21:22:16 -0800 Subject: [PATCH 06/19] Make histogram without buckes to be always explicit bucket and some cleanups --- .../venice/stats/VeniceMetricsConfig.java | 70 ++++++++------ ...VeniceOpenTelemetryMetricNamingFormat.java | 2 +- .../VeniceOpenTelemetryMetricsRepository.java | 78 ++++++++++++--- .../stats/metrics/AllMetricEntities.java | 20 ++++ .../venice/stats/metrics/MetricEntities.java | 9 ++ .../stats/{ => metrics}/MetricEntity.java | 14 +-- .../venice/stats/metrics/MetricType.java | 27 ++++++ .../venice/stats/metrics/MetricUnit.java | 8 ++ .../metrics/modules/RouterMetricEntities.java | 95 +++++++++++++++++++ .../venice/stats/VeniceMetricsConfigTest.java | 25 +---- ...iceOpenTelemetryMetricsRepositoryTest.java | 15 +-- .../router/stats/RouterHttpRequestStats.java | 61 ++++++------ .../router/stats/RouterMetricEntities.java | 95 ------------------- 13 files changed, 315 insertions(+), 204 deletions(-) create mode 100644 internal/venice-client-common/src/main/java/com/linkedin/venice/stats/metrics/AllMetricEntities.java create mode 100644 internal/venice-client-common/src/main/java/com/linkedin/venice/stats/metrics/MetricEntities.java rename internal/venice-client-common/src/main/java/com/linkedin/venice/stats/{ => metrics}/MetricEntity.java (86%) create mode 100644 internal/venice-client-common/src/main/java/com/linkedin/venice/stats/metrics/MetricType.java create mode 100644 internal/venice-client-common/src/main/java/com/linkedin/venice/stats/metrics/MetricUnit.java create mode 100644 internal/venice-client-common/src/main/java/com/linkedin/venice/stats/metrics/modules/RouterMetricEntities.java delete mode 100644 services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouterMetricEntities.java diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceMetricsConfig.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceMetricsConfig.java index f52d62bb42..0f73965665 100644 --- a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceMetricsConfig.java +++ b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceMetricsConfig.java @@ -1,10 +1,7 @@ package com.linkedin.venice.stats; import io.opentelemetry.exporter.otlp.internal.OtlpConfigUtil; -import io.opentelemetry.sdk.metrics.Aggregation; -import io.opentelemetry.sdk.metrics.InstrumentType; import io.opentelemetry.sdk.metrics.export.AggregationTemporalitySelector; -import io.opentelemetry.sdk.metrics.export.DefaultAggregationSelector; import io.opentelemetry.sdk.metrics.export.MetricExporter; import io.tehuti.metrics.MetricConfig; import java.util.HashMap; @@ -120,8 +117,10 @@ public class VeniceMetricsConfig { /** Aggregation Temporality selector to export only the delta or cumulate or different */ private final AggregationTemporalitySelector otelAggregationTemporalitySelector; - /** Default histogram aggregation to be used for all histograms: Select exponential or explicit bucket histogram */ - private final DefaultAggregationSelector otelHistogramAggregationSelector; + /** Default histogram aggregation to be used for all histograms: exponential or explicit bucket histogram */ + private final boolean useOtelExponentialHistogram; + private final int otelExponentialHistogramMaxScale; + private final int otelExponentialHistogramMaxBuckets; private VeniceMetricsConfig(Builder builder) { this.serviceName = builder.serviceName; @@ -134,7 +133,9 @@ private VeniceMetricsConfig(Builder builder) { this.exportOtelMetricsToLog = builder.exportOtelMetricsToLog; this.metricNamingFormat = builder.metricNamingFormat; this.otelAggregationTemporalitySelector = builder.otelAggregationTemporalitySelector; - this.otelHistogramAggregationSelector = builder.otelHistogramAggregationSelector; + this.useOtelExponentialHistogram = builder.useOtelExponentialHistogram; + this.otelExponentialHistogramMaxScale = builder.otelExponentialHistogramMaxScale; + this.otelExponentialHistogramMaxBuckets = builder.otelExponentialHistogramMaxBuckets; this.tehutiMetricConfig = builder.tehutiMetricConfig; } @@ -150,7 +151,10 @@ public static class Builder { private VeniceOpenTelemetryMetricNamingFormat metricNamingFormat = VeniceOpenTelemetryMetricNamingFormat.SNAKE_CASE; private AggregationTemporalitySelector otelAggregationTemporalitySelector = AggregationTemporalitySelector.deltaPreferred(); - DefaultAggregationSelector otelHistogramAggregationSelector = null; + private boolean useOtelExponentialHistogram = true; + private int otelExponentialHistogramMaxScale = 3; + private int otelExponentialHistogramMaxBuckets = 250; + private MetricConfig tehutiMetricConfig = null; public Builder setServiceName(String serviceName) { @@ -199,8 +203,18 @@ public Builder setOtelAggregationTemporalitySelector( return this; } - public Builder setOtelHistogramAggregationSelector(DefaultAggregationSelector otelHistogramAggregationSelector) { - this.otelHistogramAggregationSelector = otelHistogramAggregationSelector; + public Builder setUseOtelExponentialHistogram(boolean useOtelExponentialHistogram) { + this.useOtelExponentialHistogram = useOtelExponentialHistogram; + return this; + } + + public Builder setOtelExponentialHistogramMaxScale(int otelExponentialHistogramMaxScale) { + this.otelExponentialHistogramMaxScale = otelExponentialHistogramMaxScale; + return this; + } + + public Builder setOtelExponentialHistogramMaxBuckets(int otelExponentialHistogramMaxBuckets) { + this.otelExponentialHistogramMaxBuckets = otelExponentialHistogramMaxBuckets; return this; } @@ -264,27 +278,15 @@ public Builder extractAndSetOtelConfigs(Map configs) { if ((configValue = configs.get(OTEL_EXPORTER_OTLP_METRICS_DEFAULT_HISTOGRAM_AGGREGATION)) != null) { switch (configValue.toLowerCase(Locale.ROOT)) { case "base2_exponential_bucket_histogram": + setUseOtelExponentialHistogram(true); String maxScaleValue = configs.get(OTEL_EXPORTER_OTLP_METRICS_DEFAULT_HISTOGRAM_AGGREGATION_MAX_SCALE); + setOtelExponentialHistogramMaxScale(Integer.parseInt(maxScaleValue)); String maxBucketValue = configs.get(OTEL_EXPORTER_OTLP_METRICS_DEFAULT_HISTOGRAM_AGGREGATION_MAX_BUCKETS); - if (maxScaleValue != null && maxBucketValue != null) { - int maxScale = Integer.parseInt(maxScaleValue); - int maxBuckets = Integer.parseInt(maxBucketValue); - setOtelHistogramAggregationSelector( - DefaultAggregationSelector.getDefault() - .with( - InstrumentType.HISTOGRAM, - Aggregation.base2ExponentialBucketHistogram(maxBuckets, maxScale))); - } else { - setOtelHistogramAggregationSelector( - DefaultAggregationSelector.getDefault() - .with(InstrumentType.HISTOGRAM, Aggregation.base2ExponentialBucketHistogram())); - } + setOtelExponentialHistogramMaxBuckets(Integer.parseInt(maxBucketValue)); break; case "explicit_bucket_histogram": - setOtelHistogramAggregationSelector( - DefaultAggregationSelector.getDefault() - .with(InstrumentType.HISTOGRAM, Aggregation.explicitBucketHistogram())); + setUseOtelExponentialHistogram(false); break; default: @@ -375,8 +377,16 @@ public AggregationTemporalitySelector getOtelAggregationTemporalitySelector() { return otelAggregationTemporalitySelector; } - public DefaultAggregationSelector getOtelHistogramAggregationSelector() { - return otelHistogramAggregationSelector; + public boolean useOtelExponentialHistogram() { + return useOtelExponentialHistogram; + } + + public int getOtelExponentialHistogramMaxScale() { + return otelExponentialHistogramMaxScale; + } + + public int getOtelExponentialHistogramMaxBuckets() { + return otelExponentialHistogramMaxBuckets; } public MetricConfig getTehutiMetricConfig() { @@ -390,7 +400,9 @@ public String toString() { + ", otelExportProtocol='" + otelExportProtocol + '\'' + ", otelEndpoint='" + otelEndpoint + '\'' + ", otelHeaders=" + otelHeaders + ", exportOtelMetricsToLog=" + exportOtelMetricsToLog + ", metricNamingFormat=" + metricNamingFormat + ", otelAggregationTemporalitySelector=" - + otelAggregationTemporalitySelector + ", otelHistogramAggregationSelector=" + otelHistogramAggregationSelector - + ", tehutiMetricConfig=" + tehutiMetricConfig + '}'; + + otelAggregationTemporalitySelector + ", useOtelExponentialHistogram=" + useOtelExponentialHistogram + + ", otelExponentialHistogramMaxScale=" + otelExponentialHistogramMaxScale + + ", otelExponentialHistogramMaxBuckets=" + otelExponentialHistogramMaxBuckets + ", tehutiMetricConfig=" + + tehutiMetricConfig + '}'; } } diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceOpenTelemetryMetricNamingFormat.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceOpenTelemetryMetricNamingFormat.java index c0878bcef9..b8a3caf2b6 100644 --- a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceOpenTelemetryMetricNamingFormat.java +++ b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceOpenTelemetryMetricNamingFormat.java @@ -33,7 +33,7 @@ public int getValue() { } /** - * validate whether the metric name is a valid {@link VeniceOpenTelemetryMetricNamingFormat#SNAKE_CASE} + * validate whether the input name is defined as a valid {@link VeniceOpenTelemetryMetricNamingFormat#SNAKE_CASE} */ public static void validateMetricName(String name) { if (name == null || name.isEmpty()) { diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceOpenTelemetryMetricsRepository.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceOpenTelemetryMetricsRepository.java index 2c77b88bec..2f0846b987 100644 --- a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceOpenTelemetryMetricsRepository.java +++ b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceOpenTelemetryMetricsRepository.java @@ -4,6 +4,10 @@ import static com.linkedin.venice.stats.VeniceOpenTelemetryMetricNamingFormat.validateMetricName; import com.linkedin.venice.exceptions.VeniceException; +import com.linkedin.venice.stats.metrics.AllMetricEntities; +import com.linkedin.venice.stats.metrics.MetricEntities; +import com.linkedin.venice.stats.metrics.MetricEntity; +import com.linkedin.venice.stats.metrics.MetricType; import com.linkedin.venice.utils.concurrent.VeniceConcurrentHashMap; import io.opentelemetry.api.OpenTelemetry; import io.opentelemetry.api.metrics.DoubleHistogram; @@ -16,9 +20,12 @@ import io.opentelemetry.sdk.OpenTelemetrySdk; import io.opentelemetry.sdk.common.CompletableResultCode; import io.opentelemetry.sdk.metrics.Aggregation; +import io.opentelemetry.sdk.metrics.InstrumentSelector; +import io.opentelemetry.sdk.metrics.InstrumentSelectorBuilder; import io.opentelemetry.sdk.metrics.InstrumentType; import io.opentelemetry.sdk.metrics.SdkMeterProvider; import io.opentelemetry.sdk.metrics.SdkMeterProviderBuilder; +import io.opentelemetry.sdk.metrics.View; import io.opentelemetry.sdk.metrics.data.AggregationTemporality; import io.opentelemetry.sdk.metrics.data.MetricData; import io.opentelemetry.sdk.metrics.export.MetricExporter; @@ -27,6 +34,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; +import java.util.List; import java.util.Map; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; @@ -59,12 +67,55 @@ MetricExporter getOtlpHttpMetricExporter(VeniceMetricsConfig metricsConfig) { if (metricsConfig.getOtelAggregationTemporalitySelector() != null) { exporterBuilder.setAggregationTemporalitySelector(metricsConfig.getOtelAggregationTemporalitySelector()); } - if (metricsConfig.getOtelHistogramAggregationSelector() != null) { - exporterBuilder.setDefaultAggregationSelector(metricsConfig.getOtelHistogramAggregationSelector()); - } return exporterBuilder.build(); } + /** + * Setting Exponential Histogram aggregation for {@link MetricType#HISTOGRAM} by looping through all + * the metric entities for this service and registering the views with exponential histogram aggregation for + * the Histogram type. + * + * {@link OtlpHttpMetricExporterBuilder#setDefaultAggregationSelector} to enable exponential histogram aggregation + * is not used here to set the aggregation: to not convert the histograms of type + * {@link MetricType#HISTOGRAM_WITHOUT_BUCKETS} to exponential histograms to follow explict boundaries. + */ + private void setExponentialHistogramAggregation(SdkMeterProviderBuilder builder, VeniceMetricsConfig metricsConfig) { + List metricNames = new ArrayList<>(); + + // Loop through this module's metric entities and collect metric names + Class> moduleMetricEntityEnum = AllMetricEntities.getModuleMetricEntityEnum(getMetricPrefix()); + if (moduleMetricEntityEnum == null) { + LOGGER.warn("No metric entities found for module: {}", getMetricPrefix()); + return; + } + Enum[] constants = moduleMetricEntityEnum.getEnumConstants(); + if (constants != null) { + for (Enum constant: constants) { + if (constant instanceof MetricEntities) { + MetricEntities metricEntities = (MetricEntities) constant; + MetricEntity metricEntity = metricEntities.getMetricEntity(); + if (metricEntity.getMetricType() == MetricType.HISTOGRAM) { + metricNames.add(getFullMetricName(getMetricPrefix(), metricEntity.getMetricName())); + } + } + } + } + + // Build an InstrumentSelector with multiple setName calls for all Exponential Histogram metrics + InstrumentSelectorBuilder selectorBuilder = InstrumentSelector.builder().setType(InstrumentType.HISTOGRAM); + metricNames.forEach(selectorBuilder::setName); + + // Register a single view with all metric names included in the InstrumentSelector + builder.registerView( + selectorBuilder.build(), + View.builder() + .setAggregation( + Aggregation.base2ExponentialBucketHistogram( + metricsConfig.getOtelExponentialHistogramMaxBuckets(), + metricsConfig.getOtelExponentialHistogramMaxScale())) + .build()); + } + public VeniceOpenTelemetryMetricsRepository(VeniceMetricsConfig metricsConfig) { emitOpenTelemetryMetrics = metricsConfig.emitOtelMetrics(); metricFormat = metricsConfig.getMetricNamingFormat(); @@ -76,8 +127,8 @@ public VeniceOpenTelemetryMetricsRepository(VeniceMetricsConfig metricsConfig) { "OpenTelemetry initialization for {} started with config: {}", metricsConfig.getServiceName(), metricsConfig.toString()); - this.metricPrefix = transformMetricName("venice." + metricsConfig.getMetricPrefix(), metricFormat); - + this.metricPrefix = "venice." + metricsConfig.getMetricPrefix(); + validateMetricName(this.metricPrefix); try { SdkMeterProviderBuilder builder = SdkMeterProvider.builder(); if (metricsConfig.exportOtelMetricsToEndpoint()) { @@ -89,13 +140,17 @@ public VeniceOpenTelemetryMetricsRepository(VeniceMetricsConfig metricsConfig) { builder.registerMetricReader(PeriodicMetricReader.builder(new LogBasedMetricExporter(metricsConfig)).build()); } + if (metricsConfig.useOtelExponentialHistogram()) { + setExponentialHistogramAggregation(builder, metricsConfig); + } + builder.setResource(Resource.empty()); sdkMeterProvider = builder.build(); // Register MeterProvider with the OpenTelemetry instance OpenTelemetry openTelemetry = OpenTelemetrySdk.builder().setMeterProvider(sdkMeterProvider).build(); - this.meter = openTelemetry.getMeter(getMetricPrefix()); + this.meter = openTelemetry.getMeter(transformMetricName(getMetricPrefix(), metricFormat)); LOGGER.info( "OpenTelemetry initialization for {} completed with config: {}", metricsConfig.getServiceName(), @@ -125,9 +180,9 @@ public DoubleHistogram getHistogram(MetricEntity metricEntity) { return histogramMap.computeIfAbsent(metricEntity.getMetricName(), key -> { String fullMetricName = getFullMetricName(getMetricPrefix(), metricEntity.getMetricName()); DoubleHistogramBuilder builder = meter.histogramBuilder(fullMetricName) - .setUnit(metricEntity.getUnit()) + .setUnit(metricEntity.getUnit().name()) .setDescription(metricEntity.getDescription()); - if (metricEntity.getMetricType() == MetricEntity.MetricType.HISTOGRAM_WITHOUT_BUCKETS) { + if (metricEntity.getMetricType() == MetricType.HISTOGRAM_WITHOUT_BUCKETS) { builder.setExplicitBucketBoundariesAdvice(new ArrayList<>()); } return builder.build(); @@ -141,7 +196,7 @@ public LongCounter getCounter(MetricEntity metricEntity) { return counterMap.computeIfAbsent(metricEntity.getMetricName(), key -> { String fullMetricName = getFullMetricName(getMetricPrefix(), metricEntity.getMetricName()); LongCounterBuilder builder = meter.counterBuilder(fullMetricName) - .setUnit(metricEntity.getUnit()) + .setUnit(metricEntity.getUnit().name()) .setDescription(metricEntity.getDescription()); return builder.build(); }); @@ -180,11 +235,6 @@ public AggregationTemporality getAggregationTemporality(InstrumentType instrumen return metricsConfig.getOtelAggregationTemporalitySelector().getAggregationTemporality(instrumentType); } - @Override - public Aggregation getDefaultAggregation(InstrumentType instrumentType) { - return metricsConfig.getOtelHistogramAggregationSelector().getDefaultAggregation(instrumentType); - } - @Override public CompletableResultCode export(Collection metrics) { LOGGER.info("Logging OpenTelemetry metrics for debug purpose: {}", Arrays.toString(metrics.toArray())); diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/metrics/AllMetricEntities.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/metrics/AllMetricEntities.java new file mode 100644 index 0000000000..016d2ef1eb --- /dev/null +++ b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/metrics/AllMetricEntities.java @@ -0,0 +1,20 @@ +package com.linkedin.venice.stats.metrics; + +import com.linkedin.venice.stats.metrics.modules.RouterMetricEntities; +import java.util.HashMap; +import java.util.Map; + + +public class AllMetricEntities { + private static final Map>> allModuleMetricEntitiesEnums = new HashMap<>(); + + // Add all the components metric enum classes + static { + allModuleMetricEntitiesEnums.put("venice.router", RouterMetricEntities.class); + } + + // Method to retrieve an enum class by key + public static Class> getModuleMetricEntityEnum(String key) { + return allModuleMetricEntitiesEnums.get(key); + } +} diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/metrics/MetricEntities.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/metrics/MetricEntities.java new file mode 100644 index 0000000000..edaeb5ca55 --- /dev/null +++ b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/metrics/MetricEntities.java @@ -0,0 +1,9 @@ +package com.linkedin.venice.stats.metrics; + +/** + * Interface to get {@link MetricEntity} + * All modules metric enum class should implement this interface. + */ +public interface MetricEntities { + MetricEntity getMetricEntity(); +} diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/MetricEntity.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/metrics/MetricEntity.java similarity index 86% rename from internal/venice-client-common/src/main/java/com/linkedin/venice/stats/MetricEntity.java rename to internal/venice-client-common/src/main/java/com/linkedin/venice/stats/metrics/MetricEntity.java index b3655bf8f8..4419d41c7f 100644 --- a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/MetricEntity.java +++ b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/metrics/MetricEntity.java @@ -1,4 +1,4 @@ -package com.linkedin.venice.stats; +package com.linkedin.venice.stats.metrics; import com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions; import java.util.Set; @@ -11,20 +11,16 @@ * Metric entity class to define a metric with all its properties */ public class MetricEntity { - public enum MetricType { - HISTOGRAM, HISTOGRAM_WITHOUT_BUCKETS, COUNTER - }; - private final String metricName; private final MetricType metricType; - private final String unit; + private final MetricUnit unit; private final String description; private final Set dimensionsList; public MetricEntity( @Nonnull String metricName, @Nonnull MetricType metricType, - @Nonnull String unit, + @Nonnull MetricUnit unit, @Nonnull String description) { this(metricName, metricType, unit, description, null); } @@ -32,7 +28,7 @@ public MetricEntity( public MetricEntity( @Nonnull String metricName, @Nonnull MetricType metricType, - @Nonnull String unit, + @Nonnull MetricUnit unit, @Nonnull String description, @Nullable Set dimensionsList) { Validate.notEmpty(metricName, "Metric name cannot be null or empty"); @@ -54,7 +50,7 @@ public MetricType getMetricType() { } @Nonnull - public String getUnit() { + public MetricUnit getUnit() { return unit; } diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/metrics/MetricType.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/metrics/MetricType.java new file mode 100644 index 0000000000..96ce0344cf --- /dev/null +++ b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/metrics/MetricType.java @@ -0,0 +1,27 @@ +package com.linkedin.venice.stats.metrics; + +import com.linkedin.venice.stats.VeniceMetricsConfig; +import com.linkedin.venice.stats.VeniceOpenTelemetryMetricsRepository; + + +/** + * Metric type enum to define the type of metrics Venice supports via OpenTelemetry + */ +public enum MetricType { + /** + * For Histogram with percentiles: can be configured to be exponential or explicit bucket + * check {@link VeniceMetricsConfig.Builder#extractAndSetOtelConfigs} for more details + */ + HISTOGRAM, + /** + * For Histogram without percentiles: Explicit bucket histogram. + * Provides multiple aggregations like min, max, count and sum without the memory overhead of percentiles. + * check {@link VeniceOpenTelemetryMetricsRepository#getHistogram} and + * {@link VeniceOpenTelemetryMetricsRepository#setExponentialHistogramAggregation} for more details + */ + HISTOGRAM_WITHOUT_BUCKETS, + /** + * For Counter: A simple counter that can be added to. + */ + COUNTER; +} diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/metrics/MetricUnit.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/metrics/MetricUnit.java new file mode 100644 index 0000000000..3cb823d623 --- /dev/null +++ b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/metrics/MetricUnit.java @@ -0,0 +1,8 @@ +package com.linkedin.venice.stats.metrics; + +/** + * Metric Unit enum to define list of Units supported for metrics + */ +public enum MetricUnit { + NUMBER, MILLISECONDS +} diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/metrics/modules/RouterMetricEntities.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/metrics/modules/RouterMetricEntities.java new file mode 100644 index 0000000000..e340651474 --- /dev/null +++ b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/metrics/modules/RouterMetricEntities.java @@ -0,0 +1,95 @@ +package com.linkedin.venice.stats.metrics.modules; + +import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.HTTP_RESPONSE_STATUS_CODE; +import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.HTTP_RESPONSE_STATUS_CODE_CATEGORY; +import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.VENICE_CLUSTER_NAME; +import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.VENICE_REQUEST_METHOD; +import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.VENICE_REQUEST_RETRY_ABORT_REASON; +import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.VENICE_REQUEST_RETRY_TYPE; +import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.VENICE_REQUEST_VALIDATION_OUTCOME; +import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.VENICE_RESPONSE_STATUS_CODE_CATEGORY; +import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.VENICE_STORE_NAME; + +import com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions; +import com.linkedin.venice.stats.metrics.MetricEntities; +import com.linkedin.venice.stats.metrics.MetricEntity; +import com.linkedin.venice.stats.metrics.MetricType; +import com.linkedin.venice.stats.metrics.MetricUnit; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashSet; +import java.util.Set; + + +/** + * List all Metric entities for router + */ +public enum RouterMetricEntities implements MetricEntities { + INCOMING_CALL_COUNT( + "incoming_call_count", MetricType.COUNTER, MetricUnit.NUMBER, "Count of all incoming requests", + setOf(VENICE_STORE_NAME, VENICE_CLUSTER_NAME, VENICE_REQUEST_METHOD) + ), + CALL_COUNT( + "call_count", MetricType.COUNTER, MetricUnit.NUMBER, "Count of all requests with response details", + setOf( + VENICE_STORE_NAME, + VENICE_CLUSTER_NAME, + VENICE_REQUEST_METHOD, + HTTP_RESPONSE_STATUS_CODE, + HTTP_RESPONSE_STATUS_CODE_CATEGORY, + VENICE_RESPONSE_STATUS_CODE_CATEGORY) + ), + CALL_TIME( + "call_time", MetricType.HISTOGRAM, MetricUnit.MILLISECONDS, "Latency based on all responses", + setOf( + VENICE_STORE_NAME, + VENICE_CLUSTER_NAME, + VENICE_REQUEST_METHOD, + HTTP_RESPONSE_STATUS_CODE_CATEGORY, + VENICE_RESPONSE_STATUS_CODE_CATEGORY) + ), + CALL_KEY_COUNT( + "call_key_count", MetricType.HISTOGRAM_WITHOUT_BUCKETS, MetricUnit.NUMBER, "Count of keys in multi key requests", + setOf(VENICE_STORE_NAME, VENICE_CLUSTER_NAME, VENICE_REQUEST_METHOD, VENICE_REQUEST_VALIDATION_OUTCOME) + ), + RETRY_COUNT( + "retry_count", MetricType.COUNTER, MetricUnit.NUMBER, "Count of retries triggered", + setOf(VENICE_STORE_NAME, VENICE_CLUSTER_NAME, VENICE_REQUEST_METHOD, VENICE_REQUEST_RETRY_TYPE) + ), + ALLOWED_RETRY_COUNT( + "allowed_retry_count", MetricType.COUNTER, MetricUnit.NUMBER, "Count of allowed retry requests", + setOf(VENICE_STORE_NAME, VENICE_CLUSTER_NAME, VENICE_REQUEST_METHOD) + ), + DISALLOWED_RETRY_COUNT( + "disallowed_retry_count", MetricType.COUNTER, MetricUnit.NUMBER, "Count of disallowed retry requests", + setOf(VENICE_STORE_NAME, VENICE_CLUSTER_NAME, VENICE_REQUEST_METHOD) + ), + RETRY_DELAY( + "retry_delay", MetricType.HISTOGRAM_WITHOUT_BUCKETS, MetricUnit.MILLISECONDS, "Retry delay time", + setOf(VENICE_STORE_NAME, VENICE_CLUSTER_NAME, VENICE_REQUEST_METHOD) + ), + ABORTED_RETRY_COUNT( + "aborted_retry_count", MetricType.COUNTER, MetricUnit.NUMBER, "Count of aborted retry requests", + setOf(VENICE_STORE_NAME, VENICE_CLUSTER_NAME, VENICE_REQUEST_METHOD, VENICE_REQUEST_RETRY_ABORT_REASON) + ); + + private final MetricEntity metricEntity; + + RouterMetricEntities( + String metricName, + MetricType metricType, + MetricUnit unit, + String description, + Set dimensionsList) { + this.metricEntity = new MetricEntity(metricName, metricType, unit, description, dimensionsList); + } + + @Override + public MetricEntity getMetricEntity() { + return metricEntity; + } + + public static Set setOf(T... objs) { + return Collections.unmodifiableSet(new HashSet<>(Arrays.asList(objs))); + } +} diff --git a/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/VeniceMetricsConfigTest.java b/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/VeniceMetricsConfigTest.java index 021bf78301..c7af203f26 100644 --- a/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/VeniceMetricsConfigTest.java +++ b/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/VeniceMetricsConfigTest.java @@ -17,7 +17,6 @@ import com.linkedin.venice.stats.VeniceMetricsConfig.Builder; import io.opentelemetry.exporter.otlp.internal.OtlpConfigUtil; -import io.opentelemetry.sdk.metrics.InstrumentType; import io.opentelemetry.sdk.metrics.export.AggregationTemporalitySelector; import io.tehuti.metrics.MetricConfig; import java.util.HashMap; @@ -44,7 +43,9 @@ public void testDefaultValuesWithBasicConfig() { assertFalse(config.exportOtelMetricsToLog()); assertEquals(config.getMetricNamingFormat(), VeniceOpenTelemetryMetricNamingFormat.SNAKE_CASE); assertEquals(config.getOtelAggregationTemporalitySelector(), AggregationTemporalitySelector.deltaPreferred()); - assertEquals(config.getOtelHistogramAggregationSelector(), null); + assertEquals(config.useOtelExponentialHistogram(), true); + assertEquals(config.getOtelExponentialHistogramMaxScale(), 3); + assertEquals(config.getOtelExponentialHistogramMaxBuckets(), 250); assertNotNull(config.getTehutiMetricConfig()); } @@ -153,26 +154,6 @@ public void testSetAggregationTemporalitySelectorInvalidConfig() { assertEquals(config.getOtelAggregationTemporalitySelector(), AggregationTemporalitySelector.deltaPreferred()); } - @Test - public void testSetHistogramAggregationSelector() { - Map otelConfigs = new HashMap<>(); - otelConfigs.put(OTEL_VENICE_ENABLED, "true"); - otelConfigs.put(OTEL_VENICE_EXPORT_TO_ENDPOINT, "true"); - otelConfigs.put(OTEL_EXPORTER_OTLP_METRICS_PROTOCOL, OtlpConfigUtil.PROTOCOL_HTTP_PROTOBUF); - otelConfigs.put(OTEL_EXPORTER_OTLP_METRICS_ENDPOINT, "http://localhost"); - otelConfigs.put(OTEL_EXPORTER_OTLP_METRICS_DEFAULT_HISTOGRAM_AGGREGATION, "base2_exponential_bucket_histogram"); - otelConfigs.put(OTEL_EXPORTER_OTLP_METRICS_DEFAULT_HISTOGRAM_AGGREGATION_MAX_SCALE, "10"); - otelConfigs.put(OTEL_EXPORTER_OTLP_METRICS_DEFAULT_HISTOGRAM_AGGREGATION_MAX_BUCKETS, "50"); - - VeniceMetricsConfig config = new Builder().setServiceName("TestService") - .setMetricPrefix("TestPrefix") - .extractAndSetOtelConfigs(otelConfigs) - .build(); - assertEquals( - config.getOtelHistogramAggregationSelector().getDefaultAggregation(InstrumentType.HISTOGRAM).toString(), - "Base2ExponentialHistogramAggregation{maxBuckets=50,maxScale=10}"); - } - @Test(expectedExceptions = IllegalArgumentException.class) public void testSetHistogramAggregationSelectorInvalidConfig() { Map otelConfigs = new HashMap<>(); diff --git a/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/VeniceOpenTelemetryMetricsRepositoryTest.java b/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/VeniceOpenTelemetryMetricsRepositoryTest.java index 5e2fdf1301..76d34fb7e7 100644 --- a/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/VeniceOpenTelemetryMetricsRepositoryTest.java +++ b/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/VeniceOpenTelemetryMetricsRepositoryTest.java @@ -8,6 +8,9 @@ import static org.testng.Assert.assertNull; import static org.testng.Assert.assertSame; +import com.linkedin.venice.stats.metrics.MetricEntity; +import com.linkedin.venice.stats.metrics.MetricType; +import com.linkedin.venice.stats.metrics.MetricUnit; import io.opentelemetry.api.metrics.DoubleHistogram; import io.opentelemetry.api.metrics.LongCounter; import io.opentelemetry.sdk.metrics.export.MetricExporter; @@ -59,9 +62,9 @@ public void testConstructorWithEmitDisabled() { assertNull(metricsRepository.getSdkMeterProvider()); assertNull(metricsRepository.getMeter()); assertNull( - metricsRepository.getInstrument(new MetricEntity("test", MetricEntity.MetricType.HISTOGRAM, "unit", "desc"))); + metricsRepository.getInstrument(new MetricEntity("test", MetricType.HISTOGRAM, MetricUnit.NUMBER, "desc"))); assertNull( - metricsRepository.getInstrument(new MetricEntity("test", MetricEntity.MetricType.COUNTER, "unit", "desc"))); + metricsRepository.getInstrument(new MetricEntity("test", MetricType.COUNTER, MetricUnit.NUMBER, "desc"))); } @Test @@ -107,9 +110,9 @@ public void testTransformMetricName() { @Test public void testCreateTwoHistograms() { DoubleHistogram histogram1 = (DoubleHistogram) metricsRepository - .getInstrument(new MetricEntity("test_histogram", MetricEntity.MetricType.HISTOGRAM, "unit", "desc")); + .getInstrument(new MetricEntity("test_histogram", MetricType.HISTOGRAM, MetricUnit.NUMBER, "desc")); DoubleHistogram histogram2 = (DoubleHistogram) metricsRepository - .getInstrument(new MetricEntity("test_histogram", MetricEntity.MetricType.HISTOGRAM, "unit", "desc")); + .getInstrument(new MetricEntity("test_histogram", MetricType.HISTOGRAM, MetricUnit.NUMBER, "desc")); assertNotNull(histogram1); assertSame(histogram1, histogram2, "Should return the same instance for the same histogram name."); @@ -118,9 +121,9 @@ public void testCreateTwoHistograms() { @Test public void testCreateTwoCounters() { LongCounter counter1 = (LongCounter) metricsRepository - .getInstrument(new MetricEntity("test_counter", MetricEntity.MetricType.COUNTER, "unit", "desc")); + .getInstrument(new MetricEntity("test_counter", MetricType.COUNTER, MetricUnit.NUMBER, "desc")); LongCounter counter2 = (LongCounter) metricsRepository - .getInstrument(new MetricEntity("test_counter", MetricEntity.MetricType.COUNTER, "unit", "desc")); + .getInstrument(new MetricEntity("test_counter", MetricType.COUNTER, MetricUnit.NUMBER, "desc")); assertNotNull(counter1); assertSame(counter1, counter2, "Should return the same instance for the same counter name."); diff --git a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouterHttpRequestStats.java b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouterHttpRequestStats.java index 347ff39b18..5e3aa84744 100644 --- a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouterHttpRequestStats.java +++ b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouterHttpRequestStats.java @@ -2,15 +2,6 @@ import static com.linkedin.venice.router.RouterServer.ROUTER_SERVICE_METRIC_PREFIX; import static com.linkedin.venice.router.RouterServer.ROUTER_SERVICE_NAME; -import static com.linkedin.venice.router.stats.RouterMetricEntities.ABORTED_RETRY_COUNT; -import static com.linkedin.venice.router.stats.RouterMetricEntities.ALLOWED_RETRY_COUNT; -import static com.linkedin.venice.router.stats.RouterMetricEntities.CALL_COUNT; -import static com.linkedin.venice.router.stats.RouterMetricEntities.CALL_KEY_COUNT; -import static com.linkedin.venice.router.stats.RouterMetricEntities.CALL_TIME; -import static com.linkedin.venice.router.stats.RouterMetricEntities.DISALLOWED_RETRY_COUNT; -import static com.linkedin.venice.router.stats.RouterMetricEntities.INCOMING_CALL_COUNT; -import static com.linkedin.venice.router.stats.RouterMetricEntities.RETRY_COUNT; -import static com.linkedin.venice.router.stats.RouterMetricEntities.RETRY_DELAY; import static com.linkedin.venice.stats.AbstractVeniceAggStats.STORE_NAME_FOR_TOTAL_STAT; import static com.linkedin.venice.stats.dimensions.VeniceHttpResponseStatusCodeCategory.getVeniceHttpResponseStatusCodeCategory; import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.HTTP_RESPONSE_STATUS_CODE; @@ -22,6 +13,15 @@ import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.VENICE_REQUEST_VALIDATION_OUTCOME; import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.VENICE_RESPONSE_STATUS_CODE_CATEGORY; import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.VENICE_STORE_NAME; +import static com.linkedin.venice.stats.metrics.modules.RouterMetricEntities.ABORTED_RETRY_COUNT; +import static com.linkedin.venice.stats.metrics.modules.RouterMetricEntities.ALLOWED_RETRY_COUNT; +import static com.linkedin.venice.stats.metrics.modules.RouterMetricEntities.CALL_COUNT; +import static com.linkedin.venice.stats.metrics.modules.RouterMetricEntities.CALL_KEY_COUNT; +import static com.linkedin.venice.stats.metrics.modules.RouterMetricEntities.CALL_TIME; +import static com.linkedin.venice.stats.metrics.modules.RouterMetricEntities.DISALLOWED_RETRY_COUNT; +import static com.linkedin.venice.stats.metrics.modules.RouterMetricEntities.INCOMING_CALL_COUNT; +import static com.linkedin.venice.stats.metrics.modules.RouterMetricEntities.RETRY_COUNT; +import static com.linkedin.venice.stats.metrics.modules.RouterMetricEntities.RETRY_DELAY; import com.linkedin.alpini.router.monitoring.ScatterGatherStats; import com.linkedin.venice.common.VeniceSystemStoreUtils; @@ -168,8 +168,8 @@ public RouterHttpRequestStats( Rate tardyRequestRate = new OccurrenceRate(); incomingRequestSensor = registerSensor("request", new Count(), requestRate); - incomingRequestSensorOtel = - (LongCounter) metricsRepository.getOpenTelemetryMetricsRepository().getInstrument(INCOMING_CALL_COUNT); + incomingRequestSensorOtel = (LongCounter) metricsRepository.getOpenTelemetryMetricsRepository() + .getInstrument(INCOMING_CALL_COUNT.getMetricEntity()); healthySensor = registerSensor("healthy_request", new Count(), healthyRequestRate); unhealthySensor = registerSensor("unhealthy_request", new Count()); @@ -180,29 +180,30 @@ public RouterHttpRequestStats( tardyRequestRatioSensor = registerSensor(new TehutiUtils.SimpleRatioStat(tardyRequestRate, requestRate, "tardy_request_ratio")); badRequestSensor = registerSensor("bad_request", new Count()); - requestSensorOtel = (LongCounter) metricsRepository.getOpenTelemetryMetricsRepository().getInstrument(CALL_COUNT); + requestSensorOtel = + (LongCounter) metricsRepository.getOpenTelemetryMetricsRepository().getInstrument(CALL_COUNT.getMetricEntity()); errorRetryCountSensor = registerSensor("error_retry", new Count()); - retryTriggeredSensorOtel = - (LongCounter) metricsRepository.getOpenTelemetryMetricsRepository().getInstrument(RETRY_COUNT); + retryTriggeredSensorOtel = (LongCounter) metricsRepository.getOpenTelemetryMetricsRepository() + .getInstrument(RETRY_COUNT.getMetricEntity()); allowedRetryRequestSensor = registerSensor("allowed_retry_request_count", new OccurrenceRate()); - allowedRetryRequestSensorOtel = - (LongCounter) metricsRepository.getOpenTelemetryMetricsRepository().getInstrument(ALLOWED_RETRY_COUNT); + allowedRetryRequestSensorOtel = (LongCounter) metricsRepository.getOpenTelemetryMetricsRepository() + .getInstrument(ALLOWED_RETRY_COUNT.getMetricEntity()); disallowedRetryRequestSensor = registerSensor("disallowed_retry_request_count", new OccurrenceRate()); - disallowedRetryRequestSensorOtel = - (LongCounter) metricsRepository.getOpenTelemetryMetricsRepository().getInstrument(DISALLOWED_RETRY_COUNT); + disallowedRetryRequestSensorOtel = (LongCounter) metricsRepository.getOpenTelemetryMetricsRepository() + .getInstrument(DISALLOWED_RETRY_COUNT.getMetricEntity()); errorRetryAttemptTriggeredByPendingRequestCheckSensor = registerSensor("error_retry_attempt_triggered_by_pending_request_check", new OccurrenceRate()); retryDelaySensor = registerSensor("retry_delay", new Avg(), new Max()); - retryDelaySensorOtel = - (DoubleHistogram) metricsRepository.getOpenTelemetryMetricsRepository().getInstrument(RETRY_DELAY); + retryDelaySensorOtel = (DoubleHistogram) metricsRepository.getOpenTelemetryMetricsRepository() + .getInstrument(RETRY_DELAY.getMetricEntity()); delayConstraintAbortedRetryRequest = registerSensor("delay_constraint_aborted_retry_request", new Count()); slowRouteAbortedRetryRequest = registerSensor("slow_route_aborted_retry_request", new Count()); retryRouteLimitAbortedRetryRequest = registerSensor("retry_route_limit_aborted_retry_request", new Count()); noAvailableReplicaAbortedRetryRequest = registerSensor("no_available_replica_aborted_retry_request", new Count()); - abortedRetrySensorOtel = - (LongCounter) metricsRepository.getOpenTelemetryMetricsRepository().getInstrument(ABORTED_RETRY_COUNT); + abortedRetrySensorOtel = (LongCounter) metricsRepository.getOpenTelemetryMetricsRepository() + .getInstrument(ABORTED_RETRY_COUNT.getMetricEntity()); unavailableReplicaStreamingRequestSensor = registerSensor("unavailable_replica_streaming_request", new Count()); requestThrottledByRouterCapacitySensor = registerSensor("request_throttled_by_router_capacity", new Count()); @@ -214,8 +215,8 @@ public RouterHttpRequestStats( unhealthyRequestLatencySensor = registerSensor("unhealthy_request_latency", new Avg(), new Max(0)); tardyRequestLatencySensor = registerSensor("tardy_request_latency", new Avg(), new Max(0)); throttledRequestLatencySensor = registerSensor("throttled_request_latency", new Avg(), new Max(0)); - latencySensorOtel = - (DoubleHistogram) metricsRepository.getOpenTelemetryMetricsRepository().getInstrument(CALL_TIME); + latencySensorOtel = (DoubleHistogram) metricsRepository.getOpenTelemetryMetricsRepository() + .getInstrument(CALL_TIME.getMetricEntity()); routerResponseWaitingTimeSensor = registerSensor( "response_waiting_time", @@ -252,8 +253,8 @@ public RouterHttpRequestStats( keyNumSensor = registerSensor("key_num", new Avg(), new Max(0)); badRequestKeyCountSensor = registerSensor("bad_request_key_count", new OccurrenceRate(), new Avg(), new Max()); - keyCountSensorOtel = - (DoubleHistogram) metricsRepository.getOpenTelemetryMetricsRepository().getInstrument(CALL_KEY_COUNT); + keyCountSensorOtel = (DoubleHistogram) metricsRepository.getOpenTelemetryMetricsRepository() + .getInstrument(CALL_KEY_COUNT.getMetricEntity()); /** * request_usage.Total is incoming KPS while request_usage.OccurrenceRate is QPS @@ -556,12 +557,16 @@ public void recordResponse() { public void recordAllowedRetryRequest() { allowedRetryRequestSensor.record(); - allowedRetryRequestSensorOtel.add(1, commonMetricDimensions); + if (emitOpenTelemetryMetrics) { + allowedRetryRequestSensorOtel.add(1, commonMetricDimensions); + } } public void recordDisallowedRetryRequest() { disallowedRetryRequestSensor.record(); - disallowedRetryRequestSensorOtel.add(1, commonMetricDimensions); + if (emitOpenTelemetryMetrics) { + disallowedRetryRequestSensorOtel.add(1, commonMetricDimensions); + } } public void recordErrorRetryAttemptTriggeredByPendingRequestCheck() { diff --git a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouterMetricEntities.java b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouterMetricEntities.java deleted file mode 100644 index 05e88e9714..0000000000 --- a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouterMetricEntities.java +++ /dev/null @@ -1,95 +0,0 @@ -package com.linkedin.venice.router.stats; - -import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.HTTP_RESPONSE_STATUS_CODE; -import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.HTTP_RESPONSE_STATUS_CODE_CATEGORY; -import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.VENICE_CLUSTER_NAME; -import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.VENICE_REQUEST_METHOD; -import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.VENICE_REQUEST_RETRY_ABORT_REASON; -import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.VENICE_REQUEST_RETRY_TYPE; -import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.VENICE_REQUEST_VALIDATION_OUTCOME; -import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.VENICE_RESPONSE_STATUS_CODE_CATEGORY; -import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.VENICE_STORE_NAME; -import static com.linkedin.venice.utils.Utils.setOf; - -import com.linkedin.venice.stats.MetricEntity; -import java.util.concurrent.TimeUnit; - - -/** - * List all Metric entities for router - */ -public class RouterMetricEntities { - public static final MetricEntity INCOMING_CALL_COUNT = new MetricEntity( - "incoming_call_count", - MetricEntity.MetricType.COUNTER, - "Number", - "Count of all incoming requests", - setOf(VENICE_STORE_NAME, VENICE_CLUSTER_NAME, VENICE_REQUEST_METHOD)); - - public static final MetricEntity CALL_COUNT = new MetricEntity( - "call_count", - MetricEntity.MetricType.COUNTER, - "Number", - "Count of all requests with response details", - setOf( - VENICE_STORE_NAME, - VENICE_CLUSTER_NAME, - VENICE_REQUEST_METHOD, - HTTP_RESPONSE_STATUS_CODE, - HTTP_RESPONSE_STATUS_CODE_CATEGORY, - VENICE_RESPONSE_STATUS_CODE_CATEGORY)); - - public static final MetricEntity CALL_TIME = new MetricEntity( - "call_time", - MetricEntity.MetricType.HISTOGRAM, - TimeUnit.MILLISECONDS.name(), - "Latency based on all responses", - setOf( - VENICE_STORE_NAME, - VENICE_CLUSTER_NAME, - VENICE_REQUEST_METHOD, - HTTP_RESPONSE_STATUS_CODE_CATEGORY, - VENICE_RESPONSE_STATUS_CODE_CATEGORY)); - - public static final MetricEntity CALL_KEY_COUNT = new MetricEntity( - "call_key_count", - MetricEntity.MetricType.HISTOGRAM_WITHOUT_BUCKETS, - "Number", - "Count of keys in multi key requests", - setOf(VENICE_STORE_NAME, VENICE_CLUSTER_NAME, VENICE_REQUEST_METHOD, VENICE_REQUEST_VALIDATION_OUTCOME)); - - public static final MetricEntity RETRY_COUNT = new MetricEntity( - "retry_count", - MetricEntity.MetricType.COUNTER, - "Number", - "Count of retries triggered", - setOf(VENICE_STORE_NAME, VENICE_CLUSTER_NAME, VENICE_REQUEST_METHOD, VENICE_REQUEST_RETRY_TYPE)); - - public static final MetricEntity ALLOWED_RETRY_COUNT = new MetricEntity( - "allowed_retry_count", - MetricEntity.MetricType.COUNTER, - "Number", - "Count of allowed retry requests", - setOf(VENICE_STORE_NAME, VENICE_CLUSTER_NAME, VENICE_REQUEST_METHOD)); - - public static final MetricEntity DISALLOWED_RETRY_COUNT = new MetricEntity( - "disallowed_retry_count", - MetricEntity.MetricType.COUNTER, - "Number", - "Count of disallowed retry requests", - setOf(VENICE_STORE_NAME, VENICE_CLUSTER_NAME, VENICE_REQUEST_METHOD)); - - public static final MetricEntity RETRY_DELAY = new MetricEntity( - "retry_delay", - MetricEntity.MetricType.HISTOGRAM_WITHOUT_BUCKETS, - TimeUnit.MILLISECONDS.name(), - "Retry delay time", - setOf(VENICE_STORE_NAME, VENICE_CLUSTER_NAME, VENICE_REQUEST_METHOD)); - - public static final MetricEntity ABORTED_RETRY_COUNT = new MetricEntity( - "aborted_retry_count", - MetricEntity.MetricType.COUNTER, - "Number", - "Count of aborted retry requests", - setOf(VENICE_STORE_NAME, VENICE_CLUSTER_NAME, VENICE_REQUEST_METHOD, VENICE_REQUEST_RETRY_ABORT_REASON)); -} From 6661c1aa4914db6184e171377ff8517ce084ebb7 Mon Sep 17 00:00:00 2001 From: Manoj Nagarajan Date: Thu, 21 Nov 2024 03:23:31 -0800 Subject: [PATCH 07/19] added otel metrics inside MetricEntity and record from there --- .../VeniceOpenTelemetryMetricsRepository.java | 14 ++-- .../venice/stats/metrics/MetricEntity.java | 48 ++++++++++++++ .../venice/stats/metrics/MetricType.java | 2 +- .../metrics/modules/RouterMetricEntities.java | 1 + ...iceOpenTelemetryMetricsRepositoryTest.java | 12 ++-- .../router/stats/RouterHttpRequestStats.java | 66 ++++++------------- 6 files changed, 85 insertions(+), 58 deletions(-) diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceOpenTelemetryMetricsRepository.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceOpenTelemetryMetricsRepository.java index 2f0846b987..9f395ad1fe 100644 --- a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceOpenTelemetryMetricsRepository.java +++ b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceOpenTelemetryMetricsRepository.java @@ -173,7 +173,7 @@ private String getMetricPrefix() { return metricPrefix; } - public DoubleHistogram getHistogram(MetricEntity metricEntity) { + public DoubleHistogram createHistogram(MetricEntity metricEntity) { if (!emitOpenTelemetryMetrics) { return null; } @@ -189,7 +189,7 @@ public DoubleHistogram getHistogram(MetricEntity metricEntity) { }); } - public LongCounter getCounter(MetricEntity metricEntity) { + public LongCounter createCounter(MetricEntity metricEntity) { if (!emitOpenTelemetryMetrics) { return null; } @@ -202,17 +202,21 @@ public LongCounter getCounter(MetricEntity metricEntity) { }); } - public Object getInstrument(MetricEntity metricEntity) { + public Object createInstrument(MetricEntity metricEntity) { switch (metricEntity.getMetricType()) { case HISTOGRAM: case HISTOGRAM_WITHOUT_BUCKETS: - return getHistogram(metricEntity); + metricEntity.setOtelMetric(createHistogram(metricEntity)); + break; case COUNTER: - return getCounter(metricEntity); + metricEntity.setOtelMetric(createCounter(metricEntity)); + break; + default: throw new VeniceException("Unknown metric type: " + metricEntity.getMetricType()); } + return metricEntity.getOtelMetric(); } public void close() { diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/metrics/MetricEntity.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/metrics/MetricEntity.java index 4419d41c7f..cab90074bf 100644 --- a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/metrics/MetricEntity.java +++ b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/metrics/MetricEntity.java @@ -1,6 +1,10 @@ package com.linkedin.venice.stats.metrics; +import com.linkedin.venice.stats.VeniceOpenTelemetryMetricsRepository; import com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions; +import io.opentelemetry.api.common.Attributes; +import io.opentelemetry.api.metrics.DoubleHistogram; +import io.opentelemetry.api.metrics.LongCounter; import java.util.Set; import javax.annotation.Nonnull; import javax.annotation.Nullable; @@ -16,6 +20,7 @@ public class MetricEntity { private final MetricUnit unit; private final String description; private final Set dimensionsList; + private Object otelMetric = null; public MetricEntity( @Nonnull String metricName, @@ -39,6 +44,14 @@ public MetricEntity( this.dimensionsList = dimensionsList; } + public void setOtelMetric(Object otelMetric) { + this.otelMetric = otelMetric; + } + + public Object getOtelMetric() { + return otelMetric; + } + @Nonnull public String getMetricName() { return metricName; @@ -63,4 +76,39 @@ public String getDescription() { public Set getDimensionsList() { return dimensionsList; } + + /** + * create the metric + */ + public void createMetric(VeniceOpenTelemetryMetricsRepository otelRepository) { + otelRepository.createInstrument(this); + } + + /** + * Record otel metrics + */ + private void recordOtelMetric(double value, Attributes otelDimensions) { + if (otelMetric != null) { + switch (metricType) { + case HISTOGRAM: + case HISTOGRAM_WITHOUT_BUCKETS: + ((DoubleHistogram) otelMetric).record(value, otelDimensions); + break; + case COUNTER: + ((LongCounter) otelMetric).add((long) value, otelDimensions); + break; + + default: + throw new IllegalArgumentException("Unsupported metric type: " + metricType); + } + } + } + + public void record(long value, Attributes otelDimensions) { + recordOtelMetric(value, otelDimensions); + } + + public void record(double value, Attributes otelDimensions) { + recordOtelMetric(value, otelDimensions); + } } diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/metrics/MetricType.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/metrics/MetricType.java index 96ce0344cf..077b518f33 100644 --- a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/metrics/MetricType.java +++ b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/metrics/MetricType.java @@ -16,7 +16,7 @@ public enum MetricType { /** * For Histogram without percentiles: Explicit bucket histogram. * Provides multiple aggregations like min, max, count and sum without the memory overhead of percentiles. - * check {@link VeniceOpenTelemetryMetricsRepository#getHistogram} and + * check {@link VeniceOpenTelemetryMetricsRepository#createHistogram} and * {@link VeniceOpenTelemetryMetricsRepository#setExponentialHistogramAggregation} for more details */ HISTOGRAM_WITHOUT_BUCKETS, diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/metrics/modules/RouterMetricEntities.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/metrics/modules/RouterMetricEntities.java index e340651474..7b82a2505a 100644 --- a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/metrics/modules/RouterMetricEntities.java +++ b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/metrics/modules/RouterMetricEntities.java @@ -89,6 +89,7 @@ public MetricEntity getMetricEntity() { return metricEntity; } + @SafeVarargs public static Set setOf(T... objs) { return Collections.unmodifiableSet(new HashSet<>(Arrays.asList(objs))); } diff --git a/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/VeniceOpenTelemetryMetricsRepositoryTest.java b/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/VeniceOpenTelemetryMetricsRepositoryTest.java index 76d34fb7e7..89456e3492 100644 --- a/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/VeniceOpenTelemetryMetricsRepositoryTest.java +++ b/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/VeniceOpenTelemetryMetricsRepositoryTest.java @@ -62,9 +62,9 @@ public void testConstructorWithEmitDisabled() { assertNull(metricsRepository.getSdkMeterProvider()); assertNull(metricsRepository.getMeter()); assertNull( - metricsRepository.getInstrument(new MetricEntity("test", MetricType.HISTOGRAM, MetricUnit.NUMBER, "desc"))); + metricsRepository.createInstrument(new MetricEntity("test", MetricType.HISTOGRAM, MetricUnit.NUMBER, "desc"))); assertNull( - metricsRepository.getInstrument(new MetricEntity("test", MetricType.COUNTER, MetricUnit.NUMBER, "desc"))); + metricsRepository.createInstrument(new MetricEntity("test", MetricType.COUNTER, MetricUnit.NUMBER, "desc"))); } @Test @@ -110,9 +110,9 @@ public void testTransformMetricName() { @Test public void testCreateTwoHistograms() { DoubleHistogram histogram1 = (DoubleHistogram) metricsRepository - .getInstrument(new MetricEntity("test_histogram", MetricType.HISTOGRAM, MetricUnit.NUMBER, "desc")); + .createInstrument(new MetricEntity("test_histogram", MetricType.HISTOGRAM, MetricUnit.NUMBER, "desc")); DoubleHistogram histogram2 = (DoubleHistogram) metricsRepository - .getInstrument(new MetricEntity("test_histogram", MetricType.HISTOGRAM, MetricUnit.NUMBER, "desc")); + .createInstrument(new MetricEntity("test_histogram", MetricType.HISTOGRAM, MetricUnit.NUMBER, "desc")); assertNotNull(histogram1); assertSame(histogram1, histogram2, "Should return the same instance for the same histogram name."); @@ -121,9 +121,9 @@ public void testCreateTwoHistograms() { @Test public void testCreateTwoCounters() { LongCounter counter1 = (LongCounter) metricsRepository - .getInstrument(new MetricEntity("test_counter", MetricType.COUNTER, MetricUnit.NUMBER, "desc")); + .createInstrument(new MetricEntity("test_counter", MetricType.COUNTER, MetricUnit.NUMBER, "desc")); LongCounter counter2 = (LongCounter) metricsRepository - .getInstrument(new MetricEntity("test_counter", MetricType.COUNTER, MetricUnit.NUMBER, "desc")); + .createInstrument(new MetricEntity("test_counter", MetricType.COUNTER, MetricUnit.NUMBER, "desc")); assertNotNull(counter1); assertSame(counter1, counter2, "Should return the same instance for the same counter name."); diff --git a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouterHttpRequestStats.java b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouterHttpRequestStats.java index 5e3aa84744..499fdba229 100644 --- a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouterHttpRequestStats.java +++ b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouterHttpRequestStats.java @@ -32,6 +32,7 @@ import com.linkedin.venice.stats.VeniceMetricsConfig; import com.linkedin.venice.stats.VeniceMetricsRepository; import com.linkedin.venice.stats.VeniceOpenTelemetryMetricNamingFormat; +import com.linkedin.venice.stats.VeniceOpenTelemetryMetricsRepository; import com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions; import com.linkedin.venice.stats.dimensions.VeniceRequestRetryAbortReason; import com.linkedin.venice.stats.dimensions.VeniceRequestRetryType; @@ -39,8 +40,6 @@ import com.linkedin.venice.stats.dimensions.VeniceResponseStatusCategory; import io.netty.handler.codec.http.HttpResponseStatus; import io.opentelemetry.api.common.Attributes; -import io.opentelemetry.api.metrics.DoubleHistogram; -import io.opentelemetry.api.metrics.LongCounter; import io.tehuti.Metric; import io.tehuti.metrics.MeasurableStat; import io.tehuti.metrics.MetricConfig; @@ -72,7 +71,6 @@ public class RouterHttpRequestStats extends AbstractVeniceHttpStats { /** metrics to track incoming requests */ private final Sensor incomingRequestSensor; - private final LongCounter incomingRequestSensorOtel; /** metrics to track response handling */ private final Sensor healthySensor; @@ -82,7 +80,6 @@ public class RouterHttpRequestStats extends AbstractVeniceHttpStats { private final Sensor tardyRequestRatioSensor; private final Sensor throttleSensor; private final Sensor badRequestSensor; - private final LongCounter requestSensorOtel; /** latency metrics */ private final Sensor latencySensor; @@ -90,29 +87,22 @@ public class RouterHttpRequestStats extends AbstractVeniceHttpStats { private final Sensor unhealthyRequestLatencySensor; private final Sensor tardyRequestLatencySensor; private final Sensor throttledRequestLatencySensor; - private final DoubleHistogram latencySensorOtel; /** retry metrics */ private final Sensor errorRetryCountSensor; - private final LongCounter retryTriggeredSensorOtel; private final Sensor allowedRetryRequestSensor; - private final LongCounter allowedRetryRequestSensorOtel; private final Sensor disallowedRetryRequestSensor; - private final LongCounter disallowedRetryRequestSensorOtel; private final Sensor retryDelaySensor; - private final DoubleHistogram retryDelaySensorOtel; /** retry aborted metrics */ private final Sensor delayConstraintAbortedRetryRequest; private final Sensor slowRouteAbortedRetryRequest; private final Sensor retryRouteLimitAbortedRetryRequest; private final Sensor noAvailableReplicaAbortedRetryRequest; - private final LongCounter abortedRetrySensorOtel; /** key count metrics */ private final Sensor keyNumSensor; private final Sensor badRequestKeyCountSensor; - private final DoubleHistogram keyCountSensorOtel; /** OTel metrics yet to be added */ private final Sensor requestSizeSensor; @@ -156,6 +146,7 @@ public RouterHttpRequestStats( super(metricsRepository, storeName, requestType); emitOpenTelemetryMetrics = metricsRepository.getVeniceMetricsConfig().emitOtelMetrics(); openTelemetryMetricFormat = metricsRepository.getVeniceMetricsConfig().getMetricNamingFormat(); + VeniceOpenTelemetryMetricsRepository otelRepository = metricsRepository.getOpenTelemetryMetricsRepository(); commonMetricDimensions = Attributes.builder() .put(getDimensionName(VENICE_STORE_NAME), storeName) .put(getDimensionName(VENICE_REQUEST_METHOD), requestType.name().toLowerCase()) @@ -168,8 +159,7 @@ public RouterHttpRequestStats( Rate tardyRequestRate = new OccurrenceRate(); incomingRequestSensor = registerSensor("request", new Count(), requestRate); - incomingRequestSensorOtel = (LongCounter) metricsRepository.getOpenTelemetryMetricsRepository() - .getInstrument(INCOMING_CALL_COUNT.getMetricEntity()); + INCOMING_CALL_COUNT.getMetricEntity().createMetric(otelRepository); healthySensor = registerSensor("healthy_request", new Count(), healthyRequestRate); unhealthySensor = registerSensor("unhealthy_request", new Count()); @@ -180,30 +170,24 @@ public RouterHttpRequestStats( tardyRequestRatioSensor = registerSensor(new TehutiUtils.SimpleRatioStat(tardyRequestRate, requestRate, "tardy_request_ratio")); badRequestSensor = registerSensor("bad_request", new Count()); - requestSensorOtel = - (LongCounter) metricsRepository.getOpenTelemetryMetricsRepository().getInstrument(CALL_COUNT.getMetricEntity()); + CALL_COUNT.getMetricEntity().createMetric(otelRepository); errorRetryCountSensor = registerSensor("error_retry", new Count()); - retryTriggeredSensorOtel = (LongCounter) metricsRepository.getOpenTelemetryMetricsRepository() - .getInstrument(RETRY_COUNT.getMetricEntity()); + RETRY_COUNT.getMetricEntity().createMetric(otelRepository); allowedRetryRequestSensor = registerSensor("allowed_retry_request_count", new OccurrenceRate()); - allowedRetryRequestSensorOtel = (LongCounter) metricsRepository.getOpenTelemetryMetricsRepository() - .getInstrument(ALLOWED_RETRY_COUNT.getMetricEntity()); + ALLOWED_RETRY_COUNT.getMetricEntity().createMetric(otelRepository); disallowedRetryRequestSensor = registerSensor("disallowed_retry_request_count", new OccurrenceRate()); - disallowedRetryRequestSensorOtel = (LongCounter) metricsRepository.getOpenTelemetryMetricsRepository() - .getInstrument(DISALLOWED_RETRY_COUNT.getMetricEntity()); + DISALLOWED_RETRY_COUNT.getMetricEntity().createMetric(otelRepository); errorRetryAttemptTriggeredByPendingRequestCheckSensor = registerSensor("error_retry_attempt_triggered_by_pending_request_check", new OccurrenceRate()); retryDelaySensor = registerSensor("retry_delay", new Avg(), new Max()); - retryDelaySensorOtel = (DoubleHistogram) metricsRepository.getOpenTelemetryMetricsRepository() - .getInstrument(RETRY_DELAY.getMetricEntity()); + RETRY_DELAY.getMetricEntity().createMetric(otelRepository); delayConstraintAbortedRetryRequest = registerSensor("delay_constraint_aborted_retry_request", new Count()); slowRouteAbortedRetryRequest = registerSensor("slow_route_aborted_retry_request", new Count()); retryRouteLimitAbortedRetryRequest = registerSensor("retry_route_limit_aborted_retry_request", new Count()); noAvailableReplicaAbortedRetryRequest = registerSensor("no_available_replica_aborted_retry_request", new Count()); - abortedRetrySensorOtel = (LongCounter) metricsRepository.getOpenTelemetryMetricsRepository() - .getInstrument(ABORTED_RETRY_COUNT.getMetricEntity()); + ABORTED_RETRY_COUNT.getMetricEntity().createMetric(otelRepository); unavailableReplicaStreamingRequestSensor = registerSensor("unavailable_replica_streaming_request", new Count()); requestThrottledByRouterCapacitySensor = registerSensor("request_throttled_by_router_capacity", new Count()); @@ -215,8 +199,7 @@ public RouterHttpRequestStats( unhealthyRequestLatencySensor = registerSensor("unhealthy_request_latency", new Avg(), new Max(0)); tardyRequestLatencySensor = registerSensor("tardy_request_latency", new Avg(), new Max(0)); throttledRequestLatencySensor = registerSensor("throttled_request_latency", new Avg(), new Max(0)); - latencySensorOtel = (DoubleHistogram) metricsRepository.getOpenTelemetryMetricsRepository() - .getInstrument(CALL_TIME.getMetricEntity()); + CALL_TIME.getMetricEntity().createMetric(otelRepository); routerResponseWaitingTimeSensor = registerSensor( "response_waiting_time", @@ -253,8 +236,7 @@ public RouterHttpRequestStats( keyNumSensor = registerSensor("key_num", new Avg(), new Max(0)); badRequestKeyCountSensor = registerSensor("bad_request_key_count", new OccurrenceRate(), new Avg(), new Max()); - keyCountSensorOtel = (DoubleHistogram) metricsRepository.getOpenTelemetryMetricsRepository() - .getInstrument(CALL_KEY_COUNT.getMetricEntity()); + CALL_KEY_COUNT.getMetricEntity().createMetric(otelRepository); /** * request_usage.Total is incoming KPS while request_usage.OccurrenceRate is QPS @@ -308,9 +290,7 @@ public void recordIncomingRequest() { incomingRequestSensor.record(); inFlightRequestSensor.record(currentInFlightRequest.incrementAndGet()); totalInflightRequestSensor.record(); - if (emitOpenTelemetryMetrics) { - incomingRequestSensorOtel.add(1, commonMetricDimensions); - } + INCOMING_CALL_COUNT.getMetricEntity().record(1, commonMetricDimensions); } public void recordHealthyRequest(Double latency, HttpResponseStatus responseStatus) { @@ -381,7 +361,7 @@ public void recordRetryTriggeredSensorOtel(VeniceRequestRetryType retryType) { .putAll(commonMetricDimensions) .put(getDimensionName(VENICE_REQUEST_RETRY_TYPE), retryType.getRetryType()) .build(); - retryTriggeredSensorOtel.add(1, dimensions); + RETRY_COUNT.getMetricEntity().record(1, dimensions); } } @@ -391,7 +371,7 @@ public void recordAbortedRetrySensorOtel(VeniceRequestRetryAbortReason abortReas .putAll(commonMetricDimensions) .put(getDimensionName(VENICE_REQUEST_RETRY_ABORT_REASON), abortReason.getAbortReason()) .build(); - abortedRetrySensorOtel.add(1, dimensions); + ABORTED_RETRY_COUNT.getMetricEntity().record(1, dimensions); } } @@ -434,7 +414,7 @@ public void recordLatencySensorOtel( getVeniceHttpResponseStatusCodeCategory(responseStatus)) .put(getDimensionName(VENICE_RESPONSE_STATUS_CODE_CATEGORY), veniceResponseStatusCategory.getCategory()) .build(); - latencySensorOtel.record(latency, dimensions); + CALL_TIME.getMetricEntity().record(latency, dimensions); } } @@ -450,7 +430,7 @@ public void recordRequestSensorOtel( .put(getDimensionName(VENICE_RESPONSE_STATUS_CODE_CATEGORY), veniceResponseStatusCategory.getCategory()) .put(getDimensionName(HTTP_RESPONSE_STATUS_CODE), responseStatus.codeAsText().toString()) .build(); - requestSensorOtel.add(1, dimensions); + CALL_COUNT.getMetricEntity().record(1, dimensions); } } @@ -496,7 +476,7 @@ public void recordKeyCountSensorOtel(int keyNum, VeniceRequestValidationOutcome .putAll(commonMetricDimensions) .put(getDimensionName(VENICE_REQUEST_VALIDATION_OUTCOME), outcome.getOutcome()) .build(); - keyCountSensorOtel.record(keyNum, dimensions); + CALL_KEY_COUNT.getMetricEntity().record(keyNum, dimensions); } } @@ -557,16 +537,12 @@ public void recordResponse() { public void recordAllowedRetryRequest() { allowedRetryRequestSensor.record(); - if (emitOpenTelemetryMetrics) { - allowedRetryRequestSensorOtel.add(1, commonMetricDimensions); - } + ALLOWED_RETRY_COUNT.getMetricEntity().record(1, commonMetricDimensions); } public void recordDisallowedRetryRequest() { disallowedRetryRequestSensor.record(); - if (emitOpenTelemetryMetrics) { - disallowedRetryRequestSensorOtel.add(1, commonMetricDimensions); - } + DISALLOWED_RETRY_COUNT.getMetricEntity().record(1, commonMetricDimensions); } public void recordErrorRetryAttemptTriggeredByPendingRequestCheck() { @@ -575,9 +551,7 @@ public void recordErrorRetryAttemptTriggeredByPendingRequestCheck() { public void recordRetryDelay(double delay) { retryDelaySensor.record(delay); - if (emitOpenTelemetryMetrics) { - retryDelaySensorOtel.record(delay, commonMetricDimensions); - } + RETRY_DELAY.getMetricEntity().record(delay, commonMetricDimensions); } public void recordMetaStoreShadowRead() { From d28d3abc86294ec30324b9b89f64ba4f062fe39a Mon Sep 17 00:00:00 2001 From: Manoj Nagarajan Date: Fri, 22 Nov 2024 00:32:45 -0800 Subject: [PATCH 08/19] pass MetricsRepository around instead of VeniceMetricsRepository to not break compat --- .../stats/AggHostLevelIngestionStats.java | 4 +- .../stats/AggKafkaConsumerServiceStats.java | 3 +- .../venice/stats/AbstractVeniceAggStats.java | 40 ++++++------------- .../venice/stats/metrics/MetricEntity.java | 4 +- .../utils/metrics/MetricsRepositoryUtils.java | 33 +++++++-------- .../pushmonitor/AggPushHealthStats.java | 3 +- .../AggPushStatusCleanUpStats.java | 3 +- .../stats/AbstractVeniceAggStoreStats.java | 19 +++------ .../linkedin/venice/endToEnd/TestBatch.java | 1 + .../stats/AggPartitionHealthStats.java | 4 +- .../linkedin/venice/router/RouterServer.java | 14 +++---- .../venice/router/api/VeniceDispatcher.java | 4 +- .../venice/router/api/VenicePathParser.java | 6 +-- .../router/api/VeniceVersionFinder.java | 6 +-- .../api/routing/helix/HelixGroupSelector.java | 4 +- .../ApacheHttpAsyncStorageNodeClient.java | 4 +- .../router/stats/AdminOperationsStats.java | 4 +- .../router/stats/AggHostHealthStats.java | 11 +++-- .../stats/AggRouterHttpRequestStats.java | 12 +++--- .../venice/router/stats/HealthCheckStats.java | 4 +- .../venice/router/stats/HelixGroupStats.java | 4 +- .../venice/router/stats/HostHealthStats.java | 4 +- .../router/stats/RouteHttpRequestStats.java | 8 ++-- .../venice/router/stats/RouteHttpStats.java | 8 ++-- .../stats/RouterCurrentVersionStats.java | 4 +- .../router/stats/RouterHttpRequestStats.java | 32 ++++++++++----- .../router/stats/RouterThrottleStats.java | 4 +- .../venice/router/stats/SecurityStats.java | 4 +- .../router/stats/StaleVersionStats.java | 4 +- .../router/AggRouterHttpRequestStatsTest.java | 4 +- .../router/RouteHttpRequestStatsTest.java | 4 +- .../router/api/TestVeniceDelegateMode.java | 4 +- .../router/api/TestVenicePathParser.java | 2 +- .../api/path/TestVeniceMultiGetPath.java | 2 +- .../stats/AdminOperationsStatsTest.java | 4 +- .../listener/HttpChannelInitializer.java | 5 ++- .../linkedin/venice/server/VeniceServer.java | 3 +- .../venice/stats/AggRocksDBStats.java | 8 +++- .../stats/AggServerHttpRequestStats.java | 5 ++- .../stats/AggServerQuotaUsageStats.java | 8 +++- .../stats/AggServerHttpRequestStatsTest.java | 2 + .../AggServerReadQuotaUsageStatsTest.java | 2 +- 42 files changed, 161 insertions(+), 147 deletions(-) diff --git a/clients/da-vinci-client/src/main/java/com/linkedin/davinci/stats/AggHostLevelIngestionStats.java b/clients/da-vinci-client/src/main/java/com/linkedin/davinci/stats/AggHostLevelIngestionStats.java index a635f11809..3752e8c8e8 100644 --- a/clients/da-vinci-client/src/main/java/com/linkedin/davinci/stats/AggHostLevelIngestionStats.java +++ b/clients/da-vinci-client/src/main/java/com/linkedin/davinci/stats/AggHostLevelIngestionStats.java @@ -23,10 +23,12 @@ public AggHostLevelIngestionStats( boolean unregisterMetricForDeletedStoreEnabled, Time time) { super( + serverConfig.getClusterName(), metricsRepository, new HostLevelStoreIngestionStatsSupplier(serverConfig, ingestionTaskMap, time), metadataRepository, - unregisterMetricForDeletedStoreEnabled); + unregisterMetricForDeletedStoreEnabled, + false); } static class HostLevelStoreIngestionStatsSupplier implements StatsSupplier { diff --git a/clients/da-vinci-client/src/main/java/com/linkedin/davinci/stats/AggKafkaConsumerServiceStats.java b/clients/da-vinci-client/src/main/java/com/linkedin/davinci/stats/AggKafkaConsumerServiceStats.java index 8eaab3e6f4..e2ff348e7c 100644 --- a/clients/da-vinci-client/src/main/java/com/linkedin/davinci/stats/AggKafkaConsumerServiceStats.java +++ b/clients/da-vinci-client/src/main/java/com/linkedin/davinci/stats/AggKafkaConsumerServiceStats.java @@ -28,7 +28,8 @@ public AggKafkaConsumerServiceStats( metricsRepository, new KafkaConsumerServiceStatsSupplier(getMaxElapsedTimeSinceLastPollInConsumerPool), metadataRepository, - isUnregisterMetricForDeletedStoreEnabled); + isUnregisterMetricForDeletedStoreEnabled, + true); } public void recordTotalConsumerIdleTime(double idleTime) { diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/AbstractVeniceAggStats.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/AbstractVeniceAggStats.java index 37c183ae78..dcf7dd0dbc 100644 --- a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/AbstractVeniceAggStats.java +++ b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/AbstractVeniceAggStats.java @@ -15,39 +15,18 @@ public abstract class AbstractVeniceAggStats { private final MetricsRepository metricsRepository; private String clusterName = null; - private AbstractVeniceAggStats(MetricsRepository metricsRepository, StatsSupplier statsSupplier, T totalStats) { - this.metricsRepository = metricsRepository; - this.statsFactory = statsSupplier; - this.totalStats = totalStats; - } - private AbstractVeniceAggStats( - VeniceMetricsRepository metricsRepository, - StatsSupplier statsSupplier, String clusterName, + MetricsRepository metricsRepository, + StatsSupplier statsSupplier, T totalStats) { + this.clusterName = clusterName; this.metricsRepository = metricsRepository; this.statsFactory = statsSupplier; - this.clusterName = clusterName; this.totalStats = totalStats; } - public AbstractVeniceAggStats(MetricsRepository metricsRepository, StatsSupplier statsSupplier) { - this(metricsRepository, statsSupplier, statsSupplier.get(metricsRepository, STORE_NAME_FOR_TOTAL_STAT, null, null)); - } - - public AbstractVeniceAggStats( - VeniceMetricsRepository metricsRepository, - StatsSupplier statsSupplier, - String clusterName) { - this( - metricsRepository, - statsSupplier, - clusterName, - statsSupplier.get(metricsRepository, STORE_NAME_FOR_TOTAL_STAT, clusterName, null)); - } - - public AbstractVeniceAggStats(MetricsRepository metricsRepository, String clusterName) { + public AbstractVeniceAggStats(String clusterName, MetricsRepository metricsRepository) { this.metricsRepository = metricsRepository; this.clusterName = clusterName; } @@ -58,14 +37,19 @@ public void setStatsSupplier(StatsSupplier statsSupplier) { } public AbstractVeniceAggStats( + String clusterName, MetricsRepository metricsRepository, StatsSupplier statsSupplier, - String clusterName) { + boolean perClusterAggregate) { this( + clusterName, metricsRepository, statsSupplier, - statsSupplier.get(metricsRepository, STORE_NAME_FOR_TOTAL_STAT + "." + clusterName, clusterName, null)); - this.clusterName = clusterName; + statsSupplier.get( + metricsRepository, + perClusterAggregate ? STORE_NAME_FOR_TOTAL_STAT + "." + clusterName : STORE_NAME_FOR_TOTAL_STAT, + clusterName, + null)); } public T getStoreStats(String storeName) { diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/metrics/MetricEntity.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/metrics/MetricEntity.java index cab90074bf..c00edc95ec 100644 --- a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/metrics/MetricEntity.java +++ b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/metrics/MetricEntity.java @@ -81,7 +81,9 @@ public Set getDimensionsList() { * create the metric */ public void createMetric(VeniceOpenTelemetryMetricsRepository otelRepository) { - otelRepository.createInstrument(this); + if (otelRepository != null) { + otelRepository.createInstrument(this); + } } /** diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/utils/metrics/MetricsRepositoryUtils.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/utils/metrics/MetricsRepositoryUtils.java index afca2893ac..2d274f0c9d 100644 --- a/internal/venice-client-common/src/main/java/com/linkedin/venice/utils/metrics/MetricsRepositoryUtils.java +++ b/internal/venice-client-common/src/main/java/com/linkedin/venice/utils/metrics/MetricsRepositoryUtils.java @@ -22,32 +22,33 @@ public static MetricsRepository createSingleThreadedMetricsRepository() { return createSingleThreadedMetricsRepository(TimeUnit.MINUTES.toMillis(1), 100); } - public static VeniceMetricsRepository createSingleThreadedVeniceMetricsRepository() { + public static MetricsRepository createSingleThreadedVeniceMetricsRepository() { return createSingleThreadedVeniceMetricsRepository(TimeUnit.MINUTES.toMillis(1), 100); } - public static MetricsRepository createSingleThreadedMetricsRepository( + public static MetricConfig getMetricConfig( long maxMetricsMeasurementTimeoutMs, long initialMetricsMeasurementTimeoutMs) { - return new MetricsRepository( - new MetricConfig( - new AsyncGauge.AsyncGaugeExecutor.Builder().setMetricMeasurementThreadCount(1) - .setSlowMetricMeasurementThreadCount(1) - .setInitialMetricsMeasurementTimeoutInMs(initialMetricsMeasurementTimeoutMs) - .setMaxMetricsMeasurementTimeoutInMs(maxMetricsMeasurementTimeoutMs) - .build())); - } - - public static VeniceMetricsRepository createSingleThreadedVeniceMetricsRepository( - long maxMetricsMeasurementTimeoutMs, - long initialMetricsMeasurementTimeoutMs) { - MetricConfig tehutiMetricsConfig = new MetricConfig( + return new MetricConfig( new AsyncGauge.AsyncGaugeExecutor.Builder().setMetricMeasurementThreadCount(1) .setSlowMetricMeasurementThreadCount(1) .setInitialMetricsMeasurementTimeoutInMs(initialMetricsMeasurementTimeoutMs) .setMaxMetricsMeasurementTimeoutInMs(maxMetricsMeasurementTimeoutMs) .build()); + } + + public static MetricsRepository createSingleThreadedMetricsRepository( + long maxMetricsMeasurementTimeoutMs, + long initialMetricsMeasurementTimeoutMs) { + return new MetricsRepository(getMetricConfig(maxMetricsMeasurementTimeoutMs, initialMetricsMeasurementTimeoutMs)); + } + + public static MetricsRepository createSingleThreadedVeniceMetricsRepository( + long maxMetricsMeasurementTimeoutMs, + long initialMetricsMeasurementTimeoutMs) { return new VeniceMetricsRepository( - new VeniceMetricsConfig.Builder().setTehutiMetricConfig(tehutiMetricsConfig).build()); + new VeniceMetricsConfig.Builder() + .setTehutiMetricConfig(getMetricConfig(maxMetricsMeasurementTimeoutMs, initialMetricsMeasurementTimeoutMs)) + .build()); } } diff --git a/internal/venice-common/src/main/java/com/linkedin/venice/pushmonitor/AggPushHealthStats.java b/internal/venice-common/src/main/java/com/linkedin/venice/pushmonitor/AggPushHealthStats.java index ebf508d83f..7d7035f93c 100644 --- a/internal/venice-common/src/main/java/com/linkedin/venice/pushmonitor/AggPushHealthStats.java +++ b/internal/venice-common/src/main/java/com/linkedin/venice/pushmonitor/AggPushHealthStats.java @@ -16,7 +16,8 @@ public AggPushHealthStats( metricsRepository, PushHealthStats::new, metadataRepository, - isUnregisterMetricForDeletedStoreEnabled); + isUnregisterMetricForDeletedStoreEnabled, + true); } public void recordFailedPush(String storeName, long durationInSec) { diff --git a/internal/venice-common/src/main/java/com/linkedin/venice/pushmonitor/AggPushStatusCleanUpStats.java b/internal/venice-common/src/main/java/com/linkedin/venice/pushmonitor/AggPushStatusCleanUpStats.java index 6613c1c16a..c7e508ee74 100644 --- a/internal/venice-common/src/main/java/com/linkedin/venice/pushmonitor/AggPushStatusCleanUpStats.java +++ b/internal/venice-common/src/main/java/com/linkedin/venice/pushmonitor/AggPushStatusCleanUpStats.java @@ -16,7 +16,8 @@ public AggPushStatusCleanUpStats( metricsRepository, PushStatusCleanUpStats::new, metadataRepository, - isUnregisterMetricForDeletedStoreEnabled); + isUnregisterMetricForDeletedStoreEnabled, + true); } public void recordLeakedPushStatusCount(int count) { diff --git a/internal/venice-common/src/main/java/com/linkedin/venice/stats/AbstractVeniceAggStoreStats.java b/internal/venice-common/src/main/java/com/linkedin/venice/stats/AbstractVeniceAggStoreStats.java index 2a7662a094..ed23d6e9fd 100644 --- a/internal/venice-common/src/main/java/com/linkedin/venice/stats/AbstractVeniceAggStoreStats.java +++ b/internal/venice-common/src/main/java/com/linkedin/venice/stats/AbstractVeniceAggStoreStats.java @@ -20,28 +20,19 @@ public AbstractVeniceAggStoreStats( MetricsRepository metricsRepository, StatsSupplier statsSupplier, ReadOnlyStoreRepository metadataRepository, - boolean isUnregisterMetricForDeletedStoreEnabled) { - super(metricsRepository, statsSupplier, clusterName); - this.isUnregisterMetricForDeletedStoreEnabled = isUnregisterMetricForDeletedStoreEnabled; - registerStoreDataChangedListenerIfRequired(metadataRepository); - } - - public AbstractVeniceAggStoreStats( - MetricsRepository metricsRepository, - StatsSupplier statsSupplier, - ReadOnlyStoreRepository metadataRepository, - boolean isUnregisterMetricForDeletedStoreEnabled) { - super(metricsRepository, statsSupplier); + boolean isUnregisterMetricForDeletedStoreEnabled, + boolean perClusterAggregate) { + super(clusterName, metricsRepository, statsSupplier, perClusterAggregate); this.isUnregisterMetricForDeletedStoreEnabled = isUnregisterMetricForDeletedStoreEnabled; registerStoreDataChangedListenerIfRequired(metadataRepository); } public AbstractVeniceAggStoreStats( - MetricsRepository metricsRepository, String clusterName, + MetricsRepository metricsRepository, ReadOnlyStoreRepository metadataRepository, boolean isUnregisterMetricForDeletedStoreEnabled) { - super(metricsRepository, clusterName); + super(clusterName, metricsRepository); this.isUnregisterMetricForDeletedStoreEnabled = isUnregisterMetricForDeletedStoreEnabled; registerStoreDataChangedListenerIfRequired(metadataRepository); } diff --git a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/TestBatch.java b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/TestBatch.java index d95accd2d2..a561cb58d9 100644 --- a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/TestBatch.java +++ b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/TestBatch.java @@ -274,6 +274,7 @@ public void testCompressingRecord(boolean compressionMetricCollectionEnabled, bo validator, new UpdateStoreQueryParams().setCompressionStrategy(CompressionStrategy.GZIP)); + Thread.sleep(150000); // Re-push with Kafka Input testRepush(storeName, validator); } diff --git a/services/venice-controller/src/main/java/com/linkedin/venice/controller/stats/AggPartitionHealthStats.java b/services/venice-controller/src/main/java/com/linkedin/venice/controller/stats/AggPartitionHealthStats.java index de07ad910c..18b8791b46 100644 --- a/services/venice-controller/src/main/java/com/linkedin/venice/controller/stats/AggPartitionHealthStats.java +++ b/services/venice-controller/src/main/java/com/linkedin/venice/controller/stats/AggPartitionHealthStats.java @@ -37,7 +37,7 @@ protected AggPartitionHealthStats( String clusterName, ReadOnlyStoreRepository storeRepository, PushMonitor pushMonitor) { - super(null, (metricRepo, resourceName, cluster) -> new PartitionHealthStats(resourceName), clusterName); + super(clusterName, null, (metricRepo, resourceName, cluster) -> new PartitionHealthStats(resourceName), true); this.storeRepository = storeRepository; this.pushMonitor = pushMonitor; } @@ -48,7 +48,7 @@ public AggPartitionHealthStats( RoutingDataRepository routingDataRepository, ReadOnlyStoreRepository storeRepository, PushMonitor pushMonitor) { - super(metricsRepository, PartitionHealthStats::new, clusterName); + super(clusterName, metricsRepository, PartitionHealthStats::new, true); this.storeRepository = storeRepository; this.pushMonitor = pushMonitor; diff --git a/services/venice-router/src/main/java/com/linkedin/venice/router/RouterServer.java b/services/venice-router/src/main/java/com/linkedin/venice/router/RouterServer.java index 35d6c68bb7..b70823afc4 100644 --- a/services/venice-router/src/main/java/com/linkedin/venice/router/RouterServer.java +++ b/services/venice-router/src/main/java/com/linkedin/venice/router/RouterServer.java @@ -137,7 +137,7 @@ public class RouterServer extends AbstractVeniceService { private static final String ROUTER_RETRY_MANAGER_THREAD_PREFIX = "Router-retry-manager-thread"; // Immutable state private final List serviceDiscoveryAnnouncers; - private final VeniceMetricsRepository metricsRepository; + private final MetricsRepository metricsRepository; private final Optional sslFactory; private final Optional accessController; @@ -291,7 +291,7 @@ public RouterServer( List serviceDiscoveryAnnouncers, Optional accessController, Optional sslFactory, - VeniceMetricsRepository metricsRepository) { + MetricsRepository metricsRepository) { this( properties, serviceDiscoveryAnnouncers, @@ -307,7 +307,7 @@ public RouterServer( List serviceDiscoveryAnnouncers, Optional accessController, Optional sslFactory, - VeniceMetricsRepository metricsRepository, + MetricsRepository metricsRepository, D2Client d2Client, String d2ServiceName) { this(properties, serviceDiscoveryAnnouncers, accessController, sslFactory, metricsRepository, true); @@ -326,8 +326,8 @@ public RouterServer( config.getClusterName()); this.routerStats = new RouterStats<>( requestType -> new AggRouterHttpRequestStats( - metricsRepository, config.getClusterName(), + metricsRepository, requestType, config.isKeyValueProfilingEnabled(), metadataRepository, @@ -371,7 +371,7 @@ private RouterServer( List serviceDiscoveryAnnouncers, Optional accessController, Optional sslFactory, - VeniceMetricsRepository metricsRepository, + MetricsRepository metricsRepository, boolean isCreateHelixManager) { config = new VeniceRouterConfig(properties); zkClient = @@ -386,7 +386,7 @@ private RouterServer( this.metaStoreShadowReader = Optional.empty(); this.metricsRepository = metricsRepository; - this.aggHostHealthStats = new AggHostHealthStats(metricsRepository, config.getClusterName()); + this.aggHostHealthStats = new AggHostHealthStats(config.getClusterName(), metricsRepository); this.serviceDiscoveryAnnouncers = serviceDiscoveryAnnouncers; this.accessController = accessController; @@ -429,8 +429,8 @@ public RouterServer( this.metadataRepository = metadataRepository; this.routerStats = new RouterStats<>( requestType -> new AggRouterHttpRequestStats( - metricsRepository, config.getClusterName(), + metricsRepository, requestType, config.isKeyValueProfilingEnabled(), metadataRepository, diff --git a/services/venice-router/src/main/java/com/linkedin/venice/router/api/VeniceDispatcher.java b/services/venice-router/src/main/java/com/linkedin/venice/router/api/VeniceDispatcher.java index 0a3ccefd44..cf75f003e4 100644 --- a/services/venice-router/src/main/java/com/linkedin/venice/router/api/VeniceDispatcher.java +++ b/services/venice-router/src/main/java/com/linkedin/venice/router/api/VeniceDispatcher.java @@ -30,7 +30,6 @@ import com.linkedin.venice.router.stats.RouterStats; import com.linkedin.venice.router.streaming.VeniceChunkedResponse; import com.linkedin.venice.router.throttle.PendingRequestThrottler; -import com.linkedin.venice.stats.VeniceMetricsRepository; import com.linkedin.venice.utils.LatencyUtils; import com.linkedin.venice.utils.Pair; import com.linkedin.venice.utils.Utils; @@ -41,6 +40,7 @@ import io.netty.handler.codec.http.HttpHeaderNames; import io.netty.handler.codec.http.HttpResponseStatus; import io.netty.handler.codec.http.HttpVersion; +import io.tehuti.metrics.MetricsRepository; import java.io.IOException; import java.util.Collections; import java.util.List; @@ -95,7 +95,7 @@ public VeniceDispatcher( VeniceRouterConfig config, ReadOnlyStoreRepository storeRepository, RouterStats perStoreStatsByType, - VeniceMetricsRepository metricsRepository, + MetricsRepository metricsRepository, StorageNodeClient storageNodeClient, RouteHttpRequestStats routeHttpRequestStats, AggHostHealthStats aggHostHealthStats, diff --git a/services/venice-router/src/main/java/com/linkedin/venice/router/api/VenicePathParser.java b/services/venice-router/src/main/java/com/linkedin/venice/router/api/VenicePathParser.java index 199b5a548a..5f1e81b68f 100644 --- a/services/venice-router/src/main/java/com/linkedin/venice/router/api/VenicePathParser.java +++ b/services/venice-router/src/main/java/com/linkedin/venice/router/api/VenicePathParser.java @@ -34,11 +34,11 @@ import com.linkedin.venice.router.stats.RouterStats; import com.linkedin.venice.router.streaming.VeniceChunkedWriteHandler; import com.linkedin.venice.router.utils.VeniceRouterUtils; -import com.linkedin.venice.stats.VeniceMetricsRepository; import com.linkedin.venice.streaming.StreamingUtils; import com.linkedin.venice.utils.concurrent.VeniceConcurrentHashMap; import io.netty.channel.ChannelHandlerContext; import io.netty.handler.codec.http.HttpResponseStatus; +import io.tehuti.metrics.MetricsRepository; import java.util.Collection; import java.util.Collections; import java.util.Map; @@ -114,7 +114,7 @@ public class VenicePathParser private final ReadOnlyStoreRepository storeRepository; private final VeniceRouterConfig routerConfig; private final CompressorFactory compressorFactory; - private final VeniceMetricsRepository metricsRepository; + private final MetricsRepository metricsRepository; private final ScheduledExecutorService retryManagerScheduler; private final Map routerSingleKeyRetryManagers; private final Map routerMultiKeyRetryManagers; @@ -134,7 +134,7 @@ public VenicePathParser( ReadOnlyStoreRepository storeRepository, VeniceRouterConfig routerConfig, CompressorFactory compressorFactory, - VeniceMetricsRepository metricsRepository, + MetricsRepository metricsRepository, ScheduledExecutorService retryManagerScheduler) { this.versionFinder = versionFinder; this.partitionFinder = partitionFinder; diff --git a/services/venice-router/src/main/java/com/linkedin/venice/router/api/VeniceVersionFinder.java b/services/venice-router/src/main/java/com/linkedin/venice/router/api/VeniceVersionFinder.java index a40e5c0e6e..e8670193a0 100644 --- a/services/venice-router/src/main/java/com/linkedin/venice/router/api/VeniceVersionFinder.java +++ b/services/venice-router/src/main/java/com/linkedin/venice/router/api/VeniceVersionFinder.java @@ -19,9 +19,9 @@ import com.linkedin.venice.router.stats.RouterCurrentVersionStats; import com.linkedin.venice.router.stats.StaleVersionReason; import com.linkedin.venice.router.stats.StaleVersionStats; -import com.linkedin.venice.stats.VeniceMetricsRepository; import com.linkedin.venice.utils.RedundantExceptionFilter; import com.linkedin.venice.utils.concurrent.VeniceConcurrentHashMap; +import io.tehuti.metrics.MetricsRepository; import java.util.List; import java.util.Map; import java.util.Optional; @@ -51,7 +51,7 @@ public class VeniceVersionFinder { private final HelixBaseRoutingRepository routingDataRepository; private final CompressorFactory compressorFactory; - private final VeniceMetricsRepository metricsRepository; + private final MetricsRepository metricsRepository; public VeniceVersionFinder( ReadOnlyStoreRepository metadataRepository, @@ -61,7 +61,7 @@ public VeniceVersionFinder( Map clusterToD2Map, String clusterName, CompressorFactory compressorFactory, - VeniceMetricsRepository metricsRepository) { + MetricsRepository metricsRepository) { this.metadataRepository = metadataRepository; this.routingDataRepository = routingDataRepository; this.stats = stats; diff --git a/services/venice-router/src/main/java/com/linkedin/venice/router/api/routing/helix/HelixGroupSelector.java b/services/venice-router/src/main/java/com/linkedin/venice/router/api/routing/helix/HelixGroupSelector.java index 452d3f7299..c36023b87b 100644 --- a/services/venice-router/src/main/java/com/linkedin/venice/router/api/routing/helix/HelixGroupSelector.java +++ b/services/venice-router/src/main/java/com/linkedin/venice/router/api/routing/helix/HelixGroupSelector.java @@ -4,7 +4,7 @@ import com.linkedin.venice.exceptions.VeniceException; import com.linkedin.venice.helix.HelixInstanceConfigRepository; import com.linkedin.venice.router.stats.HelixGroupStats; -import com.linkedin.venice.stats.VeniceMetricsRepository; +import io.tehuti.metrics.MetricsRepository; import java.util.concurrent.TimeUnit; @@ -26,7 +26,7 @@ public class HelixGroupSelector implements HelixGroupSelectionStrategy { private final HelixGroupStats helixGroupStats; public HelixGroupSelector( - VeniceMetricsRepository metricsRepository, + MetricsRepository metricsRepository, HelixInstanceConfigRepository instanceConfigRepository, HelixGroupSelectionStrategyEnum strategyEnum, TimeoutProcessor timeoutProcessor) { diff --git a/services/venice-router/src/main/java/com/linkedin/venice/router/httpclient/ApacheHttpAsyncStorageNodeClient.java b/services/venice-router/src/main/java/com/linkedin/venice/router/httpclient/ApacheHttpAsyncStorageNodeClient.java index 40479dd457..c6f1395c0f 100644 --- a/services/venice-router/src/main/java/com/linkedin/venice/router/httpclient/ApacheHttpAsyncStorageNodeClient.java +++ b/services/venice-router/src/main/java/com/linkedin/venice/router/httpclient/ApacheHttpAsyncStorageNodeClient.java @@ -14,12 +14,12 @@ import com.linkedin.venice.service.AbstractVeniceService; import com.linkedin.venice.stats.DnsLookupStats; import com.linkedin.venice.stats.HttpConnectionPoolStats; -import com.linkedin.venice.stats.VeniceMetricsRepository; import com.linkedin.venice.utils.DaemonThreadFactory; import com.linkedin.venice.utils.Utils; import com.linkedin.venice.utils.concurrent.VeniceConcurrentHashMap; import io.netty.buffer.ByteBuf; import io.netty.buffer.Unpooled; +import io.tehuti.metrics.MetricsRepository; import java.io.IOException; import java.io.InputStream; import java.util.ArrayList; @@ -83,7 +83,7 @@ public class ApacheHttpAsyncStorageNodeClient implements StorageNodeClient { public ApacheHttpAsyncStorageNodeClient( VeniceRouterConfig config, Optional sslFactory, - VeniceMetricsRepository metricsRepository, + MetricsRepository metricsRepository, LiveInstanceMonitor monitor) { int totalIOThreadNum = config.getIoThreadCountInPoolMode(); diff --git a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/AdminOperationsStats.java b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/AdminOperationsStats.java index 675177df66..aeeed721e3 100644 --- a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/AdminOperationsStats.java +++ b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/AdminOperationsStats.java @@ -2,7 +2,7 @@ import com.linkedin.venice.router.VeniceRouterConfig; import com.linkedin.venice.stats.AbstractVeniceStats; -import com.linkedin.venice.stats.VeniceMetricsRepository; +import io.tehuti.metrics.MetricsRepository; import io.tehuti.metrics.Sensor; import io.tehuti.metrics.stats.AsyncGauge; import io.tehuti.metrics.stats.Count; @@ -12,7 +12,7 @@ public class AdminOperationsStats extends AbstractVeniceStats { private final Sensor adminRequestSensor; private final Sensor errorAdminRequestSensor; - public AdminOperationsStats(VeniceMetricsRepository metricsRepository, String name, VeniceRouterConfig config) { + public AdminOperationsStats(MetricsRepository metricsRepository, String name, VeniceRouterConfig config) { super(metricsRepository, name); adminRequestSensor = registerSensorIfAbsent("admin_request", new Count()); errorAdminRequestSensor = registerSensorIfAbsent("error_admin_request", new Count()); diff --git a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/AggHostHealthStats.java b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/AggHostHealthStats.java index 230f12f005..0343f2d0d1 100644 --- a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/AggHostHealthStats.java +++ b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/AggHostHealthStats.java @@ -2,17 +2,16 @@ import com.linkedin.venice.stats.AbstractVeniceAggStats; import com.linkedin.venice.stats.StatsUtils; -import com.linkedin.venice.stats.VeniceMetricsRepository; +import io.tehuti.metrics.MetricsRepository; public class AggHostHealthStats extends AbstractVeniceAggStats { - public AggHostHealthStats(VeniceMetricsRepository metricsRepository, String clusterName) { + public AggHostHealthStats(String clusterName, MetricsRepository metricsRepository) { super( + clusterName, metricsRepository, - (repo, hostName, cluster) -> new HostHealthStats( - (VeniceMetricsRepository) repo, - StatsUtils.convertHostnameToMetricName(hostName)), - clusterName); + (repo, hostName, cluster) -> new HostHealthStats(repo, StatsUtils.convertHostnameToMetricName(hostName)), + false); } private HostHealthStats getHostStats(String hostName) { diff --git a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/AggRouterHttpRequestStats.java b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/AggRouterHttpRequestStats.java index 38d1321a32..7f59b12b2b 100644 --- a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/AggRouterHttpRequestStats.java +++ b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/AggRouterHttpRequestStats.java @@ -5,9 +5,9 @@ import com.linkedin.venice.read.RequestType; import com.linkedin.venice.stats.AbstractVeniceAggStats; import com.linkedin.venice.stats.AbstractVeniceAggStoreStats; -import com.linkedin.venice.stats.VeniceMetricsRepository; import com.linkedin.venice.utils.concurrent.VeniceConcurrentHashMap; import io.netty.handler.codec.http.HttpResponseStatus; +import io.tehuti.metrics.MetricsRepository; import java.util.Map; import java.util.function.Function; @@ -16,14 +16,14 @@ public class AggRouterHttpRequestStats extends AbstractVeniceAggStoreStats scatterGatherStatsMap = new VeniceConcurrentHashMap<>(); public AggRouterHttpRequestStats( - VeniceMetricsRepository metricsRepository, String clusterName, + MetricsRepository metricsRepository, RequestType requestType, ReadOnlyStoreRepository metadataRepository, boolean isUnregisterMetricForDeletedStoreEnabled) { this( - metricsRepository, clusterName, + metricsRepository, requestType, false, metadataRepository, @@ -31,13 +31,13 @@ public AggRouterHttpRequestStats( } public AggRouterHttpRequestStats( - VeniceMetricsRepository metricsRepository, String cluster, + MetricsRepository metricsRepository, RequestType requestType, boolean isKeyValueProfilingEnabled, ReadOnlyStoreRepository metadataRepository, boolean isUnregisterMetricForDeletedStoreEnabled) { - super(metricsRepository, cluster, metadataRepository, isUnregisterMetricForDeletedStoreEnabled); + super(cluster, metricsRepository, metadataRepository, isUnregisterMetricForDeletedStoreEnabled); /** * Use a setter function to bypass the restriction that the supertype constructor could not * touch member fields of current object. @@ -51,7 +51,7 @@ public AggRouterHttpRequestStats( } return new RouterHttpRequestStats( - (VeniceMetricsRepository) metricsRepo, + metricsRepo, storeName, clusterName, requestType, diff --git a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/HealthCheckStats.java b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/HealthCheckStats.java index 55d95dff70..53a269c74b 100644 --- a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/HealthCheckStats.java +++ b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/HealthCheckStats.java @@ -1,7 +1,7 @@ package com.linkedin.venice.router.stats; import com.linkedin.venice.stats.AbstractVeniceStats; -import com.linkedin.venice.stats.VeniceMetricsRepository; +import io.tehuti.metrics.MetricsRepository; import io.tehuti.metrics.Sensor; import io.tehuti.metrics.stats.Count; @@ -10,7 +10,7 @@ public class HealthCheckStats extends AbstractVeniceStats { private final Sensor healthCheckRequestSensor; private final Sensor errorHealthCheckRequestSensor; - public HealthCheckStats(VeniceMetricsRepository metricsRepository, String name) { + public HealthCheckStats(MetricsRepository metricsRepository, String name) { super(metricsRepository, name); healthCheckRequestSensor = registerSensor("healthcheck_request", new Count()); errorHealthCheckRequestSensor = registerSensor("error_healthcheck_request", new Count()); diff --git a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/HelixGroupStats.java b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/HelixGroupStats.java index 70e7e7f8ec..a248de31cf 100644 --- a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/HelixGroupStats.java +++ b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/HelixGroupStats.java @@ -2,8 +2,8 @@ import com.linkedin.venice.router.api.routing.helix.HelixGroupSelectionStrategy; import com.linkedin.venice.stats.AbstractVeniceStats; -import com.linkedin.venice.stats.VeniceMetricsRepository; import com.linkedin.venice.utils.concurrent.VeniceConcurrentHashMap; +import io.tehuti.metrics.MetricsRepository; import io.tehuti.metrics.Sensor; import io.tehuti.metrics.stats.AsyncGauge; import io.tehuti.metrics.stats.Avg; @@ -19,7 +19,7 @@ public class HelixGroupStats extends AbstractVeniceStats { private final Sensor minGroupPendingRequest; private final Sensor avgGroupPendingRequest; - public HelixGroupStats(VeniceMetricsRepository metricsRepository, HelixGroupSelectionStrategy strategy) { + public HelixGroupStats(MetricsRepository metricsRepository, HelixGroupSelectionStrategy strategy) { super(metricsRepository, "HelixGroupStats"); this.strategy = strategy; diff --git a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/HostHealthStats.java b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/HostHealthStats.java index 8d8eafe4e6..7b9d1b203b 100644 --- a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/HostHealthStats.java +++ b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/HostHealthStats.java @@ -2,7 +2,7 @@ import com.linkedin.venice.stats.AbstractVeniceAggStats; import com.linkedin.venice.stats.AbstractVeniceStats; -import com.linkedin.venice.stats.VeniceMetricsRepository; +import io.tehuti.metrics.MetricsRepository; import io.tehuti.metrics.Sensor; import io.tehuti.metrics.stats.Avg; import io.tehuti.metrics.stats.Count; @@ -28,7 +28,7 @@ public class HostHealthStats extends AbstractVeniceStats { private Optional unhealthyHostCountCausedByPendingQueueSensor = Optional.empty(); private Optional unhealthyHostCountCausedByHeartBeatSensor = Optional.empty(); - public HostHealthStats(VeniceMetricsRepository metricsRepository, String name) { + public HostHealthStats(MetricsRepository metricsRepository, String name) { super(metricsRepository, name); this.unhealthyHostOfflineInstance = registerSensor("unhealthy_host_offline_instance", new Count()); diff --git a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouteHttpRequestStats.java b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouteHttpRequestStats.java index 6c999fd1df..e203fc4d89 100644 --- a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouteHttpRequestStats.java +++ b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouteHttpRequestStats.java @@ -3,8 +3,8 @@ import com.linkedin.venice.router.httpclient.StorageNodeClient; import com.linkedin.venice.stats.AbstractVeniceStats; import com.linkedin.venice.stats.StatsUtils; -import com.linkedin.venice.stats.VeniceMetricsRepository; import com.linkedin.venice.utils.concurrent.VeniceConcurrentHashMap; +import io.tehuti.metrics.MetricsRepository; import io.tehuti.metrics.Sensor; import io.tehuti.metrics.stats.Avg; import io.tehuti.metrics.stats.Max; @@ -20,11 +20,11 @@ * to {@link RouteHttpStats} which stores only per type stats. */ public class RouteHttpRequestStats { - private final VeniceMetricsRepository metricsRepository; + private final MetricsRepository metricsRepository; private final StorageNodeClient storageNodeClient; private final Map routeStatsMap = new VeniceConcurrentHashMap<>(); - public RouteHttpRequestStats(VeniceMetricsRepository metricsRepository, StorageNodeClient storageNodeClient) { + public RouteHttpRequestStats(MetricsRepository metricsRepository, StorageNodeClient storageNodeClient) { this.metricsRepository = metricsRepository; this.storageNodeClient = storageNodeClient; } @@ -58,7 +58,7 @@ static class InternalHostStats extends AbstractVeniceStats { private final Sensor unhealthyPendingRateSensor; private AtomicLong pendingRequestCount; - public InternalHostStats(VeniceMetricsRepository metricsRepository, String hostName) { + public InternalHostStats(MetricsRepository metricsRepository, String hostName) { super(metricsRepository, StatsUtils.convertHostnameToMetricName(hostName)); pendingRequestCount = new AtomicLong(); // pendingRequestCountSensor = diff --git a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouteHttpStats.java b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouteHttpStats.java index 79de9c6892..90bc9f94c4 100644 --- a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouteHttpStats.java +++ b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouteHttpStats.java @@ -3,8 +3,8 @@ import com.linkedin.venice.read.RequestType; import com.linkedin.venice.stats.AbstractVeniceHttpStats; import com.linkedin.venice.stats.TehutiUtils; -import com.linkedin.venice.stats.VeniceMetricsRepository; import com.linkedin.venice.utils.concurrent.VeniceConcurrentHashMap; +import io.tehuti.metrics.MetricsRepository; import io.tehuti.metrics.Sensor; import io.tehuti.metrics.stats.Count; import io.tehuti.metrics.stats.Max; @@ -13,10 +13,10 @@ public class RouteHttpStats { private final Map routeStatsMap = new VeniceConcurrentHashMap<>(); - private final VeniceMetricsRepository metricsRepository; + private final MetricsRepository metricsRepository; private final RequestType requestType; - public RouteHttpStats(VeniceMetricsRepository metricsRepository, RequestType requestType) { + public RouteHttpStats(MetricsRepository metricsRepository, RequestType requestType) { this.metricsRepository = metricsRepository; this.requestType = requestType; } @@ -31,7 +31,7 @@ static class InternalRouteHttpStats extends AbstractVeniceHttpStats { private final Sensor responseWaitingTimeSensor; private final Sensor requestSensor; - public InternalRouteHttpStats(VeniceMetricsRepository metricsRepository, String hostName, RequestType requestType) { + public InternalRouteHttpStats(MetricsRepository metricsRepository, String hostName, RequestType requestType) { super(metricsRepository, hostName.replace('.', '_'), requestType); requestSensor = registerSensor("request", new Count()); diff --git a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouterCurrentVersionStats.java b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouterCurrentVersionStats.java index 57965d6b06..35dd99e619 100644 --- a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouterCurrentVersionStats.java +++ b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouterCurrentVersionStats.java @@ -1,7 +1,7 @@ package com.linkedin.venice.router.stats; import com.linkedin.venice.stats.AbstractVeniceStats; -import com.linkedin.venice.stats.VeniceMetricsRepository; +import io.tehuti.metrics.MetricsRepository; import io.tehuti.metrics.Sensor; import io.tehuti.metrics.stats.Gauge; @@ -9,7 +9,7 @@ public class RouterCurrentVersionStats extends AbstractVeniceStats { private final Sensor currentVersionNumberSensor; - public RouterCurrentVersionStats(VeniceMetricsRepository metricsRepository, String name) { + public RouterCurrentVersionStats(MetricsRepository metricsRepository, String name) { super(metricsRepository, name); this.currentVersionNumberSensor = registerSensor("current_version", new Gauge(-1)); } diff --git a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouterHttpRequestStats.java b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouterHttpRequestStats.java index 499fdba229..6079a7b3e8 100644 --- a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouterHttpRequestStats.java +++ b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouterHttpRequestStats.java @@ -43,6 +43,7 @@ import io.tehuti.Metric; import io.tehuti.metrics.MeasurableStat; import io.tehuti.metrics.MetricConfig; +import io.tehuti.metrics.MetricsRepository; import io.tehuti.metrics.Sensor; import io.tehuti.metrics.stats.Avg; import io.tehuti.metrics.stats.Count; @@ -137,21 +138,34 @@ public class RouterHttpRequestStats extends AbstractVeniceHttpStats { // QPS metrics public RouterHttpRequestStats( - VeniceMetricsRepository metricsRepository, + MetricsRepository metricsRepository, String storeName, String clusterName, RequestType requestType, ScatterGatherStats scatterGatherStats, boolean isKeyValueProfilingEnabled) { super(metricsRepository, storeName, requestType); - emitOpenTelemetryMetrics = metricsRepository.getVeniceMetricsConfig().emitOtelMetrics(); - openTelemetryMetricFormat = metricsRepository.getVeniceMetricsConfig().getMetricNamingFormat(); - VeniceOpenTelemetryMetricsRepository otelRepository = metricsRepository.getOpenTelemetryMetricsRepository(); - commonMetricDimensions = Attributes.builder() - .put(getDimensionName(VENICE_STORE_NAME), storeName) - .put(getDimensionName(VENICE_REQUEST_METHOD), requestType.name().toLowerCase()) - .put(getDimensionName(VENICE_CLUSTER_NAME), clusterName) - .build(); + VeniceOpenTelemetryMetricsRepository otelRepository; + if (metricsRepository instanceof VeniceMetricsRepository) { + VeniceMetricsRepository veniceMetricsRepository = (VeniceMetricsRepository) metricsRepository; + emitOpenTelemetryMetrics = (veniceMetricsRepository != null) + ? veniceMetricsRepository.getVeniceMetricsConfig().emitOtelMetrics() + : false; + openTelemetryMetricFormat = (veniceMetricsRepository != null) + ? veniceMetricsRepository.getVeniceMetricsConfig().getMetricNamingFormat() + : VeniceOpenTelemetryMetricNamingFormat.SNAKE_CASE; + otelRepository = veniceMetricsRepository.getOpenTelemetryMetricsRepository(); + commonMetricDimensions = Attributes.builder() + .put(getDimensionName(VENICE_STORE_NAME), storeName) + .put(getDimensionName(VENICE_REQUEST_METHOD), requestType.name().toLowerCase()) + .put(getDimensionName(VENICE_CLUSTER_NAME), clusterName) + .build(); + } else { + emitOpenTelemetryMetrics = false; + openTelemetryMetricFormat = VeniceOpenTelemetryMetricNamingFormat.SNAKE_CASE; + commonMetricDimensions = null; + otelRepository = null; + } this.systemStoreName = VeniceSystemStoreUtils.extractSystemStoreType(storeName); Rate requestRate = new OccurrenceRate(); diff --git a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouterThrottleStats.java b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouterThrottleStats.java index 42fbbd4b74..028a5c728a 100644 --- a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouterThrottleStats.java +++ b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouterThrottleStats.java @@ -1,7 +1,7 @@ package com.linkedin.venice.router.stats; import com.linkedin.venice.stats.AbstractVeniceStats; -import com.linkedin.venice.stats.VeniceMetricsRepository; +import io.tehuti.metrics.MetricsRepository; import io.tehuti.metrics.Sensor; import io.tehuti.metrics.stats.Count; @@ -9,7 +9,7 @@ public class RouterThrottleStats extends AbstractVeniceStats { private final Sensor routerThrottleSensor; - public RouterThrottleStats(VeniceMetricsRepository repository, String name) { + public RouterThrottleStats(MetricsRepository repository, String name) { super(repository, name); routerThrottleSensor = registerSensor("router_throttled_request", new Count()); diff --git a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/SecurityStats.java b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/SecurityStats.java index 14347ccc5d..596a6564b0 100644 --- a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/SecurityStats.java +++ b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/SecurityStats.java @@ -2,7 +2,7 @@ import com.linkedin.alpini.netty4.ssl.SslInitializer; import com.linkedin.venice.stats.AbstractVeniceStats; -import com.linkedin.venice.stats.VeniceMetricsRepository; +import io.tehuti.metrics.MetricsRepository; import io.tehuti.metrics.Sensor; import io.tehuti.metrics.stats.AsyncGauge; import io.tehuti.metrics.stats.Avg; @@ -19,7 +19,7 @@ public class SecurityStats extends AbstractVeniceStats { private final Sensor sslLiveConnectionCount; private final Sensor nonSslConnectionCount; - public SecurityStats(VeniceMetricsRepository repository, String name, IntSupplier secureConnectionCountSupplier) { + public SecurityStats(MetricsRepository repository, String name, IntSupplier secureConnectionCountSupplier) { super(repository, name); this.secureConnectionCountSupplier = secureConnectionCountSupplier; this.sslErrorCount = registerSensor("ssl_error", new Count()); diff --git a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/StaleVersionStats.java b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/StaleVersionStats.java index 693de6fa14..21ce6805c0 100644 --- a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/StaleVersionStats.java +++ b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/StaleVersionStats.java @@ -1,8 +1,8 @@ package com.linkedin.venice.router.stats; import com.linkedin.venice.stats.AbstractVeniceStats; -import com.linkedin.venice.stats.VeniceMetricsRepository; import com.linkedin.venice.utils.concurrent.VeniceConcurrentHashMap; +import io.tehuti.metrics.MetricsRepository; import io.tehuti.metrics.Sensor; import io.tehuti.metrics.stats.Max; import io.tehuti.metrics.stats.OccurrenceRate; @@ -13,7 +13,7 @@ public class StaleVersionStats extends AbstractVeniceStats { private final VeniceConcurrentHashMap staleVersionReasonStats = new VeniceConcurrentHashMap<>(); - public StaleVersionStats(VeniceMetricsRepository metricsRepository, String name) { + public StaleVersionStats(MetricsRepository metricsRepository, String name) { super(metricsRepository, name); staleVersionStat = registerSensor("stale_version_delta", new Max()); for (StaleVersionReason reason: StaleVersionReason.values()) { diff --git a/services/venice-router/src/test/java/com/linkedin/venice/router/AggRouterHttpRequestStatsTest.java b/services/venice-router/src/test/java/com/linkedin/venice/router/AggRouterHttpRequestStatsTest.java index 2af917f1bd..333b37ca33 100644 --- a/services/venice-router/src/test/java/com/linkedin/venice/router/AggRouterHttpRequestStatsTest.java +++ b/services/venice-router/src/test/java/com/linkedin/venice/router/AggRouterHttpRequestStatsTest.java @@ -29,8 +29,8 @@ public void setUp() { @Test public void testAggRouterMetrics() { AggRouterHttpRequestStats stats = new AggRouterHttpRequestStats( - metricsRepository, "test-cluster", + metricsRepository, RequestType.SINGLE_GET, storeMetadataRepository, true); @@ -66,8 +66,8 @@ public void testAggRouterMetrics() { @Test public void testProfilingMetrics() { AggRouterHttpRequestStats stats = new AggRouterHttpRequestStats( - metricsRepository, "test-cluster", + metricsRepository, RequestType.COMPUTE, true, storeMetadataRepository, diff --git a/services/venice-router/src/test/java/com/linkedin/venice/router/RouteHttpRequestStatsTest.java b/services/venice-router/src/test/java/com/linkedin/venice/router/RouteHttpRequestStatsTest.java index fdfa96bb08..36d7baeadc 100644 --- a/services/venice-router/src/test/java/com/linkedin/venice/router/RouteHttpRequestStatsTest.java +++ b/services/venice-router/src/test/java/com/linkedin/venice/router/RouteHttpRequestStatsTest.java @@ -7,9 +7,9 @@ import com.linkedin.venice.router.httpclient.StorageNodeClient; import com.linkedin.venice.router.stats.RouteHttpRequestStats; import com.linkedin.venice.router.stats.RouterHttpRequestStats; -import com.linkedin.venice.stats.VeniceMetricsRepository; import com.linkedin.venice.tehuti.MockTehutiReporter; import com.linkedin.venice.utils.metrics.MetricsRepositoryUtils; +import io.tehuti.metrics.MetricsRepository; import org.testng.Assert; import org.testng.annotations.BeforeSuite; import org.testng.annotations.Test; @@ -22,7 +22,7 @@ public class RouteHttpRequestStatsTest { @BeforeSuite public void setUp() { - VeniceMetricsRepository metrics = MetricsRepositoryUtils.createSingleThreadedVeniceMetricsRepository(); + MetricsRepository metrics = MetricsRepositoryUtils.createSingleThreadedVeniceMetricsRepository(); reporter = new MockTehutiReporter(); metrics.addReporter(reporter); diff --git a/services/venice-router/src/test/java/com/linkedin/venice/router/api/TestVeniceDelegateMode.java b/services/venice-router/src/test/java/com/linkedin/venice/router/api/TestVeniceDelegateMode.java index 01377fc91b..3669622691 100644 --- a/services/venice-router/src/test/java/com/linkedin/venice/router/api/TestVeniceDelegateMode.java +++ b/services/venice-router/src/test/java/com/linkedin/venice/router/api/TestVeniceDelegateMode.java @@ -225,8 +225,8 @@ public void setUp() { RouterExceptionAndTrackingUtils.setRouterStats( new RouterStats<>( requestType -> new AggRouterHttpRequestStats( - new VeniceMetricsRepository(), "test-cluster", + new VeniceMetricsRepository(), requestType, mock(ReadOnlyStoreRepository.class), true))); @@ -379,8 +379,8 @@ public void testLeastLoadedOnSlowHosts() throws RouterException { config, new RouterStats<>( requestType -> new AggRouterHttpRequestStats( - new VeniceMetricsRepository(), "test-cluster", + new VeniceMetricsRepository(), requestType, mock(ReadOnlyStoreRepository.class), true)), diff --git a/services/venice-router/src/test/java/com/linkedin/venice/router/api/TestVenicePathParser.java b/services/venice-router/src/test/java/com/linkedin/venice/router/api/TestVenicePathParser.java index 2ab4e77908..f6d497a18b 100644 --- a/services/venice-router/src/test/java/com/linkedin/venice/router/api/TestVenicePathParser.java +++ b/services/venice-router/src/test/java/com/linkedin/venice/router/api/TestVenicePathParser.java @@ -100,8 +100,8 @@ public void setUp() { RouterExceptionAndTrackingUtils.setRouterStats( new RouterStats<>( requestType -> new AggRouterHttpRequestStats( - new VeniceMetricsRepository(), CLUSTER, + new VeniceMetricsRepository(), requestType, mock(ReadOnlyStoreRepository.class), true))); diff --git a/services/venice-router/src/test/java/com/linkedin/venice/router/api/path/TestVeniceMultiGetPath.java b/services/venice-router/src/test/java/com/linkedin/venice/router/api/path/TestVeniceMultiGetPath.java index 02e6fb1bdb..11f5b0d54b 100644 --- a/services/venice-router/src/test/java/com/linkedin/venice/router/api/path/TestVeniceMultiGetPath.java +++ b/services/venice-router/src/test/java/com/linkedin/venice/router/api/path/TestVeniceMultiGetPath.java @@ -47,8 +47,8 @@ public void setUp() { RouterExceptionAndTrackingUtils.setRouterStats( new RouterStats<>( requestType -> new AggRouterHttpRequestStats( - new VeniceMetricsRepository(), "test-cluster", + new VeniceMetricsRepository(), requestType, mock(ReadOnlyStoreRepository.class), true))); diff --git a/services/venice-router/src/test/java/com/linkedin/venice/router/stats/AdminOperationsStatsTest.java b/services/venice-router/src/test/java/com/linkedin/venice/router/stats/AdminOperationsStatsTest.java index 35364da6b1..f420e0bf25 100644 --- a/services/venice-router/src/test/java/com/linkedin/venice/router/stats/AdminOperationsStatsTest.java +++ b/services/venice-router/src/test/java/com/linkedin/venice/router/stats/AdminOperationsStatsTest.java @@ -3,9 +3,9 @@ import static org.mockito.Mockito.*; import com.linkedin.venice.router.VeniceRouterConfig; -import com.linkedin.venice.stats.VeniceMetricsRepository; import com.linkedin.venice.tehuti.MockTehutiReporter; import com.linkedin.venice.utils.metrics.MetricsRepositoryUtils; +import io.tehuti.metrics.MetricsRepository; import org.testng.Assert; import org.testng.annotations.Test; @@ -13,7 +13,7 @@ public class AdminOperationsStatsTest { @Test public void testAdminOperationsStats() { - VeniceMetricsRepository metrics = MetricsRepositoryUtils.createSingleThreadedVeniceMetricsRepository(); + MetricsRepository metrics = MetricsRepositoryUtils.createSingleThreadedVeniceMetricsRepository(); MockTehutiReporter reporter = new MockTehutiReporter(); metrics.addReporter(reporter); VeniceRouterConfig mockConfig = mock(VeniceRouterConfig.class); diff --git a/services/venice-server/src/main/java/com/linkedin/venice/listener/HttpChannelInitializer.java b/services/venice-server/src/main/java/com/linkedin/venice/listener/HttpChannelInitializer.java index 50aa434264..fdca044c42 100644 --- a/services/venice-server/src/main/java/com/linkedin/venice/listener/HttpChannelInitializer.java +++ b/services/venice-server/src/main/java/com/linkedin/venice/listener/HttpChannelInitializer.java @@ -83,6 +83,7 @@ public HttpChannelInitializer( boolean isUnregisterMetricForDeletedStoreEnabled = serverConfig.isUnregisterMetricForDeletedStoreEnabled(); this.singleGetStats = new AggServerHttpRequestStats( + serverConfig.getClusterName(), metricsRepository, RequestType.SINGLE_GET, isKeyValueProfilingEnabled, @@ -90,6 +91,7 @@ public HttpChannelInitializer( isUnregisterMetricForDeletedStoreEnabled, isDaVinciClient); this.multiGetStats = new AggServerHttpRequestStats( + serverConfig.getClusterName(), metricsRepository, RequestType.MULTI_GET, isKeyValueProfilingEnabled, @@ -97,6 +99,7 @@ public HttpChannelInitializer( isUnregisterMetricForDeletedStoreEnabled, isDaVinciClient); this.computeStats = new AggServerHttpRequestStats( + serverConfig.getClusterName(), metricsRepository, RequestType.COMPUTE, isKeyValueProfilingEnabled, @@ -127,7 +130,7 @@ public HttpChannelInitializer( if (serverConfig.isQuotaEnforcementEnabled()) { String nodeId = Utils.getHelixNodeIdentifier(serverConfig.getListenerHostname(), serverConfig.getListenerPort()); - this.quotaUsageStats = new AggServerQuotaUsageStats(metricsRepository); + this.quotaUsageStats = new AggServerQuotaUsageStats(serverConfig.getClusterName(), metricsRepository); this.quotaEnforcer = new ReadQuotaEnforcementHandler( serverConfig, storeMetadataRepository, diff --git a/services/venice-server/src/main/java/com/linkedin/venice/server/VeniceServer.java b/services/venice-server/src/main/java/com/linkedin/venice/server/VeniceServer.java index 3175f1d75f..d60cce4bdb 100644 --- a/services/venice-server/src/main/java/com/linkedin/venice/server/VeniceServer.java +++ b/services/venice-server/src/main/java/com/linkedin/venice/server/VeniceServer.java @@ -326,7 +326,8 @@ private List createServices() { services.add(storageService); // Create stats for RocksDB - storageService.getRocksDBAggregatedStatistics().ifPresent(stat -> new AggRocksDBStats(metricsRepository, stat)); + storageService.getRocksDBAggregatedStatistics() + .ifPresent(stat -> new AggRocksDBStats(serverConfig.getClusterName(), metricsRepository, stat)); compressorFactory = new StorageEngineBackedCompressorFactory(storageMetadataService); diff --git a/services/venice-server/src/main/java/com/linkedin/venice/stats/AggRocksDBStats.java b/services/venice-server/src/main/java/com/linkedin/venice/stats/AggRocksDBStats.java index e2187ac785..a75303e2ca 100644 --- a/services/venice-server/src/main/java/com/linkedin/venice/stats/AggRocksDBStats.java +++ b/services/venice-server/src/main/java/com/linkedin/venice/stats/AggRocksDBStats.java @@ -8,8 +8,12 @@ * Right now, Venice SN only reports aggregated metrics for RocksDB. */ public class AggRocksDBStats extends AbstractVeniceAggStats { - public AggRocksDBStats(MetricsRepository metricsRepository, Statistics aggStat) { - super(metricsRepository, (metricsRepo, storeName, clusterName) -> new RocksDBStats(metricsRepository, storeName)); + public AggRocksDBStats(String cluster, MetricsRepository metricsRepository, Statistics aggStat) { + super( + cluster, + metricsRepository, + (metricsRepo, storeName, clusterName) -> new RocksDBStats(metricsRepository, storeName), + false); totalStats.setRocksDBStat(aggStat); } } diff --git a/services/venice-server/src/main/java/com/linkedin/venice/stats/AggServerHttpRequestStats.java b/services/venice-server/src/main/java/com/linkedin/venice/stats/AggServerHttpRequestStats.java index c11ba2c2fd..d3eadfb5ca 100644 --- a/services/venice-server/src/main/java/com/linkedin/venice/stats/AggServerHttpRequestStats.java +++ b/services/venice-server/src/main/java/com/linkedin/venice/stats/AggServerHttpRequestStats.java @@ -12,6 +12,7 @@ */ public class AggServerHttpRequestStats extends AbstractVeniceAggStoreStats { public AggServerHttpRequestStats( + String clusterName, MetricsRepository metricsRepository, RequestType requestType, boolean isKeyValueProfilingEnabled, @@ -19,10 +20,12 @@ public AggServerHttpRequestStats( boolean unregisterMetricForDeletedStoreEnabled, boolean isDaVinciClient) { super( + clusterName, metricsRepository, new ServerHttpRequestStatsSupplier(requestType, isKeyValueProfilingEnabled, isDaVinciClient), metadataRepository, - unregisterMetricForDeletedStoreEnabled); + unregisterMetricForDeletedStoreEnabled, + false); } static class ServerHttpRequestStatsSupplier implements StatsSupplier { diff --git a/services/venice-server/src/main/java/com/linkedin/venice/stats/AggServerQuotaUsageStats.java b/services/venice-server/src/main/java/com/linkedin/venice/stats/AggServerQuotaUsageStats.java index 568dc555fa..6f86a954b7 100644 --- a/services/venice-server/src/main/java/com/linkedin/venice/stats/AggServerQuotaUsageStats.java +++ b/services/venice-server/src/main/java/com/linkedin/venice/stats/AggServerQuotaUsageStats.java @@ -10,8 +10,12 @@ public class AggServerQuotaUsageStats extends AbstractVeniceAggStats { private static final int SINGLE_VERSION_FOR_TOTAL_STATS = 1; - public AggServerQuotaUsageStats(MetricsRepository metricsRepository) { - super(metricsRepository, (metrics, storeName, clusterName) -> new ServerReadQuotaUsageStats(metrics, storeName)); + public AggServerQuotaUsageStats(String cluster, MetricsRepository metricsRepository) { + super( + cluster, + metricsRepository, + (metrics, storeName, clusterName) -> new ServerReadQuotaUsageStats(metrics, storeName), + false); totalStats.setCurrentVersion(SINGLE_VERSION_FOR_TOTAL_STATS); } diff --git a/services/venice-server/src/test/java/com/linkedin/venice/stats/AggServerHttpRequestStatsTest.java b/services/venice-server/src/test/java/com/linkedin/venice/stats/AggServerHttpRequestStatsTest.java index b1922b5cb2..d6993e186b 100644 --- a/services/venice-server/src/test/java/com/linkedin/venice/stats/AggServerHttpRequestStatsTest.java +++ b/services/venice-server/src/test/java/com/linkedin/venice/stats/AggServerHttpRequestStatsTest.java @@ -30,6 +30,7 @@ public void setUp() { this.reporter = new MockTehutiReporter(); this.metricsRepository.addReporter(reporter); this.singleGetStats = new AggServerHttpRequestStats( + "test_cluster", metricsRepository, RequestType.SINGLE_GET, false, @@ -37,6 +38,7 @@ public void setUp() { true, false); this.batchGetStats = new AggServerHttpRequestStats( + "test_cluster", metricsRepository, RequestType.MULTI_GET, false, diff --git a/services/venice-server/src/test/java/com/linkedin/venice/stats/AggServerReadQuotaUsageStatsTest.java b/services/venice-server/src/test/java/com/linkedin/venice/stats/AggServerReadQuotaUsageStatsTest.java index 8b80452587..3745e200eb 100644 --- a/services/venice-server/src/test/java/com/linkedin/venice/stats/AggServerReadQuotaUsageStatsTest.java +++ b/services/venice-server/src/test/java/com/linkedin/venice/stats/AggServerReadQuotaUsageStatsTest.java @@ -16,7 +16,7 @@ public void testAggServerQuotaUsageStats() { long start = System.currentTimeMillis(); doReturn(start).when(mockTime).milliseconds(); MetricsRepository metricsRepository = new MetricsRepository(); - AggServerQuotaUsageStats aggServerQuotaUsageStats = new AggServerQuotaUsageStats(metricsRepository); + AggServerQuotaUsageStats aggServerQuotaUsageStats = new AggServerQuotaUsageStats("test_cluster", metricsRepository); String storeName = "testStore"; String storeName2 = "testStore2"; String currentReadQuotaRequestedQPSString = "." + storeName + "--current_quota_request.Gauge"; From e2ff0b04ee9077c0912542fa7b67e0206968cc3c Mon Sep 17 00:00:00 2001 From: Manoj Nagarajan Date: Fri, 22 Nov 2024 04:36:33 -0800 Subject: [PATCH 09/19] 1. allow custom dimension map for passing in custom/system dimensions 2. rename HISTOGRAM_WITHOUT_BUCKETS 3. remove modifying input argument in createInstrument 4. removed AllMetricEntities.java and moved RouterMetricEntities.java to router sub module --- .../linkedin/venice/stats/TehutiUtils.java | 14 ---- .../venice/stats/VeniceMetricsConfig.java | 67 ++++++++++++++++++- .../venice/stats/VeniceMetricsRepository.java | 21 +++++- .../VeniceOpenTelemetryMetricsRepository.java | 51 ++++++-------- .../stats/metrics/AllMetricEntities.java | 20 ------ .../venice/stats/metrics/MetricEntity.java | 4 +- .../venice/stats/metrics/MetricType.java | 11 +-- .../venice/stats/VeniceMetricsConfigTest.java | 28 ++++++++ .../utils/VeniceRouterWrapper.java | 13 ++-- .../linkedin/venice/router/RouterServer.java | 25 ++++--- .../router/stats/RouterHttpRequestStats.java | 32 +++++---- .../router/stats}/RouterMetricEntities.java | 16 ++--- 12 files changed, 194 insertions(+), 108 deletions(-) delete mode 100644 internal/venice-client-common/src/main/java/com/linkedin/venice/stats/metrics/AllMetricEntities.java rename {internal/venice-client-common/src/main/java/com/linkedin/venice/stats/metrics/modules => services/venice-router/src/main/java/com/linkedin/venice/router/stats}/RouterMetricEntities.java (88%) diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/TehutiUtils.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/TehutiUtils.java index 513d60a63d..7a5cf212cd 100644 --- a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/TehutiUtils.java +++ b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/TehutiUtils.java @@ -9,7 +9,6 @@ import io.tehuti.metrics.stats.Percentiles; import io.tehuti.metrics.stats.Rate; import java.util.Arrays; -import java.util.Map; /** @@ -130,19 +129,6 @@ public static MetricsRepository getMetricsRepository(String serviceName) { return metricsRepository; } - public static VeniceMetricsRepository getVeniceMetricsRepository( - String serviceName, - String metricPrefix, - Map configs) { - VeniceMetricsRepository metricsRepository = new VeniceMetricsRepository( - new VeniceMetricsConfig.Builder().setServiceName(serviceName) - .setMetricPrefix(metricPrefix) - .extractAndSetOtelConfigs(configs) - .build()); - metricsRepository.addReporter(new JmxReporter(serviceName)); - return metricsRepository; - } - /** * A valid metric name needs to pass the test in {@link javax.management.ObjectName}. This helper function will * try to fix all invalid character mentioned in the above function to avoid MalformedObjectNameException; besides, diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceMetricsConfig.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceMetricsConfig.java index 0f73965665..56428ec1e2 100644 --- a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceMetricsConfig.java +++ b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceMetricsConfig.java @@ -1,9 +1,11 @@ package com.linkedin.venice.stats; +import com.linkedin.venice.stats.metrics.MetricEntities; import io.opentelemetry.exporter.otlp.internal.OtlpConfigUtil; import io.opentelemetry.sdk.metrics.export.AggregationTemporalitySelector; import io.opentelemetry.sdk.metrics.export.MetricExporter; import io.tehuti.metrics.MetricConfig; +import java.util.Collection; import java.util.HashMap; import java.util.Locale; import java.util.Map; @@ -11,6 +13,13 @@ import org.apache.logging.log4j.Logger; +/** + * Configuration for metrics emitted by Venice: Holds OpenTelemetry as well as Tehuti configs
+ * + * Configs starting with "otel.venice." are venice specific configs for OpenTelemetry metrics
+ * other configs starting with "otel.exporter." are generic OpenTelemetry exporter configs but + * are parsed in this class and used setters to configure otel exporter. + */ public class VeniceMetricsConfig { private static final Logger LOGGER = LogManager.getLogger(VeniceMetricsConfig.class); @@ -19,6 +28,11 @@ public class VeniceMetricsConfig { */ public static final String OTEL_VENICE_ENABLED = "otel.venice.enabled"; + /** + * Config to set the metric prefix for OpenTelemetry metrics + */ + public static final String OTEL_VENICE_METRIC_PREFIX = "otel.venice.metric.prefix"; + /** * Config to set the naming format for OpenTelemetry metrics * {@link VeniceOpenTelemetryMetricNamingFormat} @@ -37,6 +51,18 @@ public class VeniceMetricsConfig { */ public static final String OTEL_VENICE_EXPORT_TO_ENDPOINT = "otel.venice.export.to.endpoint"; + /** + * Config Map to add custom dimensions to the metrics: Can be used for system dimensions + * amongst other custom dimensions
+ * These will be emitted along with all the metrics emitted. + * + * + * custom dimensions are passed as key=value pairs separated by '=' + * Multiple headers are separated by ',' + * For example: "custom_dimension_one=value1,custom_dimension_two=value2,custom_dimension_three=value3" + */ + public static final String OTEL_VENICE_CUSTOM_DIMENSIONS_MAP = "otel.venice.custom.dimensions.map"; + /** * Protocol over which the metrics are exported to {@link #OTEL_EXPORTER_OTLP_METRICS_ENDPOINT}
* 1. {@link OtlpConfigUtil#PROTOCOL_HTTP_PROTOBUF} => "http/protobuf"
@@ -82,6 +108,7 @@ public class VeniceMetricsConfig { private final String serviceName; private final String metricPrefix; + private final Collection metricEntities; /** reusing tehuti's MetricConfig */ private final MetricConfig tehutiMetricConfig; @@ -98,6 +125,9 @@ public class VeniceMetricsConfig { private final boolean exportOtelMetricsToEndpoint; private final boolean exportOtelMetricsToLog; + /** Custom dimensions */ + private final Map otelCustomDimensionsMap; + /** * protocol for OpenTelemetry exporter. supports * 1. {@link OtlpConfigUtil#PROTOCOL_HTTP_PROTOBUF} => "http/protobuf" @@ -125,8 +155,10 @@ public class VeniceMetricsConfig { private VeniceMetricsConfig(Builder builder) { this.serviceName = builder.serviceName; this.metricPrefix = builder.metricPrefix; + this.metricEntities = builder.metricEntities; this.emitOTelMetrics = builder.emitOtelMetrics; this.exportOtelMetricsToEndpoint = builder.exportOtelMetricsToEndpoint; + this.otelCustomDimensionsMap = builder.otelCustomDimensionsMap; this.otelExportProtocol = builder.otelExportProtocol; this.otelEndpoint = builder.otelEndpoint; this.otelHeaders = builder.otelHeaders; @@ -142,8 +174,10 @@ private VeniceMetricsConfig(Builder builder) { public static class Builder { private String serviceName = "default_service"; private String metricPrefix = null; + private Collection metricEntities; private boolean emitOtelMetrics = false; private boolean exportOtelMetricsToEndpoint = false; + private Map otelCustomDimensionsMap = new HashMap<>(); private String otelExportProtocol = OtlpConfigUtil.PROTOCOL_HTTP_PROTOBUF; private String otelEndpoint = null; Map otelHeaders = new HashMap<>(); @@ -154,7 +188,6 @@ public static class Builder { private boolean useOtelExponentialHistogram = true; private int otelExponentialHistogramMaxScale = 3; private int otelExponentialHistogramMaxBuckets = 250; - private MetricConfig tehutiMetricConfig = null; public Builder setServiceName(String serviceName) { @@ -167,6 +200,11 @@ public Builder setMetricPrefix(String metricPrefix) { return this; } + public Builder setMetricEntities(Collection metricEntities) { + this.metricEntities = metricEntities; + return this; + } + public Builder setEmitOtelMetrics(boolean emitOtelMetrics) { this.emitOtelMetrics = emitOtelMetrics; return this; @@ -227,6 +265,10 @@ public Builder extractAndSetOtelConfigs(Map configs) { setEmitOtelMetrics(Boolean.parseBoolean(configValue)); } + if ((configValue = configs.get(OTEL_VENICE_METRIC_PREFIX)) != null) { + setMetricPrefix(configValue); + } + if ((configValue = configs.get(OTEL_VENICE_EXPORT_TO_LOG)) != null) { setExportOtelMetricsToLog(Boolean.parseBoolean(configValue)); } @@ -235,6 +277,21 @@ public Builder extractAndSetOtelConfigs(Map configs) { setExportOtelMetricsToEndpoint(Boolean.parseBoolean(configValue)); } + /** + * custom dimensions are passed as key=value pairs separated by '='
+ * Multiple dimensions are separated by ',' + */ + if ((configValue = configs.get(OTEL_VENICE_CUSTOM_DIMENSIONS_MAP)) != null) { + String[] dimensions = configValue.split(","); + for (String dimension: dimensions) { + String[] keyValue = dimension.split("="); + if (keyValue.length != 2) { + throw new IllegalArgumentException("Invalid custom dimensions: " + configValue); + } + otelCustomDimensionsMap.put(keyValue[0], keyValue[1]); + } + } + if ((configValue = configs.get(OTEL_EXPORTER_OTLP_METRICS_PROTOCOL)) != null) { setOtelExportProtocol(configValue); } @@ -345,6 +402,10 @@ public String getMetricPrefix() { return this.metricPrefix; } + public Collection getMetricEntities() { + return this.metricEntities; + } + public boolean emitOtelMetrics() { return emitOTelMetrics; } @@ -353,6 +414,10 @@ public boolean exportOtelMetricsToEndpoint() { return exportOtelMetricsToEndpoint; } + public Map getOtelCustomDimensionsMap() { + return otelCustomDimensionsMap; + } + public String getOtelExportProtocol() { return otelExportProtocol; } diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceMetricsRepository.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceMetricsRepository.java index 9f6bd02b06..bfd791f390 100644 --- a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceMetricsRepository.java +++ b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceMetricsRepository.java @@ -1,7 +1,11 @@ package com.linkedin.venice.stats; +import com.linkedin.venice.stats.metrics.MetricEntities; +import io.tehuti.metrics.JmxReporter; import io.tehuti.metrics.MetricsRepository; import java.io.Closeable; +import java.util.Collection; +import java.util.Map; /** @@ -10,7 +14,7 @@ */ public class VeniceMetricsRepository extends MetricsRepository implements Closeable { private VeniceMetricsConfig veniceMetricsConfig; - VeniceOpenTelemetryMetricsRepository openTelemetryMetricsRepository; + private VeniceOpenTelemetryMetricsRepository openTelemetryMetricsRepository; public VeniceMetricsRepository() { super(); @@ -45,4 +49,19 @@ public void close() { openTelemetryMetricsRepository.close(); } } + + public static VeniceMetricsRepository getVeniceMetricsRepository( + String serviceName, + String metricPrefix, + Collection metricEntities, + Map configs) { + VeniceMetricsRepository metricsRepository = new VeniceMetricsRepository( + new VeniceMetricsConfig.Builder().setServiceName(serviceName) + .setMetricPrefix(metricPrefix) + .setMetricEntities(metricEntities) + .extractAndSetOtelConfigs(configs) + .build()); + metricsRepository.addReporter(new JmxReporter(serviceName)); + return metricsRepository; + } } diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceOpenTelemetryMetricsRepository.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceOpenTelemetryMetricsRepository.java index 9f395ad1fe..5f69971655 100644 --- a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceOpenTelemetryMetricsRepository.java +++ b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceOpenTelemetryMetricsRepository.java @@ -4,7 +4,6 @@ import static com.linkedin.venice.stats.VeniceOpenTelemetryMetricNamingFormat.validateMetricName; import com.linkedin.venice.exceptions.VeniceException; -import com.linkedin.venice.stats.metrics.AllMetricEntities; import com.linkedin.venice.stats.metrics.MetricEntities; import com.linkedin.venice.stats.metrics.MetricEntity; import com.linkedin.venice.stats.metrics.MetricType; @@ -43,8 +42,8 @@ public class VeniceOpenTelemetryMetricsRepository { private static final Logger LOGGER = LogManager.getLogger(VeniceOpenTelemetryMetricsRepository.class); private SdkMeterProvider sdkMeterProvider = null; - private boolean emitOpenTelemetryMetrics; - private VeniceOpenTelemetryMetricNamingFormat metricFormat; + private final boolean emitOpenTelemetryMetrics; + private final VeniceOpenTelemetryMetricNamingFormat metricFormat; private Meter meter; private String metricPrefix; @@ -72,32 +71,26 @@ MetricExporter getOtlpHttpMetricExporter(VeniceMetricsConfig metricsConfig) { /** * Setting Exponential Histogram aggregation for {@link MetricType#HISTOGRAM} by looping through all - * the metric entities for this service and registering the views with exponential histogram aggregation for - * the Histogram type. + * the metric entities set for this service to registering the view with exponential histogram aggregation for + * all the {@link MetricType#HISTOGRAM} metrics. * - * {@link OtlpHttpMetricExporterBuilder#setDefaultAggregationSelector} to enable exponential histogram aggregation - * is not used here to set the aggregation: to not convert the histograms of type - * {@link MetricType#HISTOGRAM_WITHOUT_BUCKETS} to exponential histograms to follow explict boundaries. + * There is a limitation in opentelemetry sdk to configure different histogram aggregation for different + * instruments, so {@link OtlpHttpMetricExporterBuilder#setDefaultAggregationSelector} to enable exponential + * histogram aggregation is not used here to not convert the histograms of type {@link MetricType#MIN_MAX_COUNT_SUM_AGGREGATIONS} + * to exponential histograms to be able to follow explict boundaries. */ private void setExponentialHistogramAggregation(SdkMeterProviderBuilder builder, VeniceMetricsConfig metricsConfig) { List metricNames = new ArrayList<>(); - // Loop through this module's metric entities and collect metric names - Class> moduleMetricEntityEnum = AllMetricEntities.getModuleMetricEntityEnum(getMetricPrefix()); - if (moduleMetricEntityEnum == null) { - LOGGER.warn("No metric entities found for module: {}", getMetricPrefix()); - return; + if (metricsConfig.getMetricEntities().isEmpty()) { + LOGGER + .warn("No metric entities found in config: {} to configure exponential histogram", metricsConfig.toString()); } - Enum[] constants = moduleMetricEntityEnum.getEnumConstants(); - if (constants != null) { - for (Enum constant: constants) { - if (constant instanceof MetricEntities) { - MetricEntities metricEntities = (MetricEntities) constant; - MetricEntity metricEntity = metricEntities.getMetricEntity(); - if (metricEntity.getMetricType() == MetricType.HISTOGRAM) { - metricNames.add(getFullMetricName(getMetricPrefix(), metricEntity.getMetricName())); - } - } + + for (MetricEntities metricEntities: metricsConfig.getMetricEntities()) { + MetricEntity metricEntity = metricEntities.getMetricEntity(); + if (metricEntity.getMetricType() == MetricType.HISTOGRAM) { + metricNames.add(getFullMetricName(getMetricPrefix(), metricEntity.getMetricName())); } } @@ -182,7 +175,8 @@ public DoubleHistogram createHistogram(MetricEntity metricEntity) { DoubleHistogramBuilder builder = meter.histogramBuilder(fullMetricName) .setUnit(metricEntity.getUnit().name()) .setDescription(metricEntity.getDescription()); - if (metricEntity.getMetricType() == MetricType.HISTOGRAM_WITHOUT_BUCKETS) { + if (metricEntity.getMetricType() == MetricType.MIN_MAX_COUNT_SUM_AGGREGATIONS) { + // No buckets needed to get only min/max/count/sum aggregations builder.setExplicitBucketBoundariesAdvice(new ArrayList<>()); } return builder.build(); @@ -205,18 +199,15 @@ public LongCounter createCounter(MetricEntity metricEntity) { public Object createInstrument(MetricEntity metricEntity) { switch (metricEntity.getMetricType()) { case HISTOGRAM: - case HISTOGRAM_WITHOUT_BUCKETS: - metricEntity.setOtelMetric(createHistogram(metricEntity)); - break; + case MIN_MAX_COUNT_SUM_AGGREGATIONS: + return createHistogram(metricEntity); case COUNTER: - metricEntity.setOtelMetric(createCounter(metricEntity)); - break; + return createCounter(metricEntity); default: throw new VeniceException("Unknown metric type: " + metricEntity.getMetricType()); } - return metricEntity.getOtelMetric(); } public void close() { diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/metrics/AllMetricEntities.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/metrics/AllMetricEntities.java deleted file mode 100644 index 016d2ef1eb..0000000000 --- a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/metrics/AllMetricEntities.java +++ /dev/null @@ -1,20 +0,0 @@ -package com.linkedin.venice.stats.metrics; - -import com.linkedin.venice.stats.metrics.modules.RouterMetricEntities; -import java.util.HashMap; -import java.util.Map; - - -public class AllMetricEntities { - private static final Map>> allModuleMetricEntitiesEnums = new HashMap<>(); - - // Add all the components metric enum classes - static { - allModuleMetricEntitiesEnums.put("venice.router", RouterMetricEntities.class); - } - - // Method to retrieve an enum class by key - public static Class> getModuleMetricEntityEnum(String key) { - return allModuleMetricEntitiesEnums.get(key); - } -} diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/metrics/MetricEntity.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/metrics/MetricEntity.java index c00edc95ec..eb11579c68 100644 --- a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/metrics/MetricEntity.java +++ b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/metrics/MetricEntity.java @@ -82,7 +82,7 @@ public Set getDimensionsList() { */ public void createMetric(VeniceOpenTelemetryMetricsRepository otelRepository) { if (otelRepository != null) { - otelRepository.createInstrument(this); + setOtelMetric(otelRepository.createInstrument(this)); } } @@ -93,7 +93,7 @@ private void recordOtelMetric(double value, Attributes otelDimensions) { if (otelMetric != null) { switch (metricType) { case HISTOGRAM: - case HISTOGRAM_WITHOUT_BUCKETS: + case MIN_MAX_COUNT_SUM_AGGREGATIONS: ((DoubleHistogram) otelMetric).record(value, otelDimensions); break; case COUNTER: diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/metrics/MetricType.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/metrics/MetricType.java index 077b518f33..58213c292a 100644 --- a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/metrics/MetricType.java +++ b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/metrics/MetricType.java @@ -9,17 +9,20 @@ */ public enum MetricType { /** - * For Histogram with percentiles: can be configured to be exponential or explicit bucket + * Use Histogram to get percentiles/min/max/count/sum and other aggregates: can be configured to + * be exponential or explicit bucket
* check {@link VeniceMetricsConfig.Builder#extractAndSetOtelConfigs} for more details */ HISTOGRAM, + /** - * For Histogram without percentiles: Explicit bucket histogram. - * Provides multiple aggregations like min, max, count and sum without the memory overhead of percentiles. + * To get min/max/count/sum aggregation without the memory overhead to calculate percentiles, use + * Otel Explicit bucket Histogram but without buckets . * check {@link VeniceOpenTelemetryMetricsRepository#createHistogram} and * {@link VeniceOpenTelemetryMetricsRepository#setExponentialHistogramAggregation} for more details */ - HISTOGRAM_WITHOUT_BUCKETS, + MIN_MAX_COUNT_SUM_AGGREGATIONS, + /** * For Counter: A simple counter that can be added to. */ diff --git a/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/VeniceMetricsConfigTest.java b/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/VeniceMetricsConfigTest.java index c7af203f26..4cfb4f6da9 100644 --- a/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/VeniceMetricsConfigTest.java +++ b/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/VeniceMetricsConfigTest.java @@ -6,6 +6,7 @@ import static com.linkedin.venice.stats.VeniceMetricsConfig.OTEL_EXPORTER_OTLP_METRICS_ENDPOINT; import static com.linkedin.venice.stats.VeniceMetricsConfig.OTEL_EXPORTER_OTLP_METRICS_PROTOCOL; import static com.linkedin.venice.stats.VeniceMetricsConfig.OTEL_EXPORTER_OTLP_METRICS_TEMPORALITY_PREFERENCE; +import static com.linkedin.venice.stats.VeniceMetricsConfig.OTEL_VENICE_CUSTOM_DIMENSIONS_MAP; import static com.linkedin.venice.stats.VeniceMetricsConfig.OTEL_VENICE_ENABLED; import static com.linkedin.venice.stats.VeniceMetricsConfig.OTEL_VENICE_EXPORT_TO_ENDPOINT; import static com.linkedin.venice.stats.VeniceMetricsConfig.OTEL_VENICE_EXPORT_TO_LOG; @@ -170,4 +171,31 @@ public void testSetHistogramAggregationSelectorInvalidConfig() { .extractAndSetOtelConfigs(otelConfigs) .build(); } + + @Test + public void testSetOtelCustomDimensionsMap() { + Map otelConfigs = new HashMap<>(); + otelConfigs.put(OTEL_VENICE_ENABLED, "true"); + otelConfigs.put(OTEL_VENICE_EXPORT_TO_ENDPOINT, "false"); + otelConfigs.put(OTEL_VENICE_CUSTOM_DIMENSIONS_MAP, "key1=value1,key2=value2"); + VeniceMetricsConfig config = new Builder().setServiceName("TestService") + .setMetricPrefix("TestPrefix") + .extractAndSetOtelConfigs(otelConfigs) + .build(); + assertEquals(config.getOtelCustomDimensionsMap().size(), 2); + assertEquals(config.getOtelCustomDimensionsMap().get("key1"), "value1"); + assertEquals(config.getOtelCustomDimensionsMap().get("key2"), "value2"); + } + + @Test(expectedExceptions = IllegalArgumentException.class) + public void testSetOtelCustomDimensionsMapWithInvalidValue() { + Map otelConfigs = new HashMap<>(); + otelConfigs.put(OTEL_VENICE_ENABLED, "true"); + otelConfigs.put(OTEL_VENICE_EXPORT_TO_ENDPOINT, "false"); + otelConfigs.put(OTEL_VENICE_CUSTOM_DIMENSIONS_MAP, "key1=value1,key2=value2=3"); + new Builder().setServiceName("TestService") + .setMetricPrefix("TestPrefix") + .extractAndSetOtelConfigs(otelConfigs) + .build(); + } } diff --git a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/integration/utils/VeniceRouterWrapper.java b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/integration/utils/VeniceRouterWrapper.java index 92a891b72b..bec8d062f5 100644 --- a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/integration/utils/VeniceRouterWrapper.java +++ b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/integration/utils/VeniceRouterWrapper.java @@ -22,6 +22,7 @@ import static com.linkedin.venice.ConfigKeys.ZOOKEEPER_ADDRESS; import static com.linkedin.venice.VeniceConstants.DEFAULT_PER_ROUTER_READ_QUOTA; import static com.linkedin.venice.integration.utils.VeniceClusterWrapperConstants.ROUTER_PORT_TO_USE_IN_VENICE_ROUTER_WRAPPER; +import static com.linkedin.venice.router.RouterServer.ROUTER_SERVICE_METRIC_ENTITIES; import static com.linkedin.venice.stats.VeniceMetricsConfig.OTEL_EXPORTER_OTLP_METRICS_DEFAULT_HISTOGRAM_AGGREGATION; import static com.linkedin.venice.stats.VeniceMetricsConfig.OTEL_EXPORTER_OTLP_METRICS_DEFAULT_HISTOGRAM_AGGREGATION_MAX_BUCKETS; import static com.linkedin.venice.stats.VeniceMetricsConfig.OTEL_EXPORTER_OTLP_METRICS_DEFAULT_HISTOGRAM_AGGREGATION_MAX_SCALE; @@ -40,7 +41,7 @@ import com.linkedin.venice.router.RouterServer; import com.linkedin.venice.router.httpclient.StorageNodeClientType; import com.linkedin.venice.servicediscovery.ServiceDiscoveryAnnouncer; -import com.linkedin.venice.stats.TehutiUtils; +import com.linkedin.venice.stats.VeniceMetricsRepository; import com.linkedin.venice.tehuti.MetricsAware; import com.linkedin.venice.utils.PropertyBuilder; import com.linkedin.venice.utils.SslUtils; @@ -194,9 +195,10 @@ static StatefulServiceProvider generateService( d2Servers, Optional.empty(), Optional.of(SslUtils.getVeniceLocalSslFactory()), - TehutiUtils.getVeniceMetricsRepository( + VeniceMetricsRepository.getVeniceMetricsRepository( ROUTER_SERVICE_NAME, ROUTER_SERVICE_METRIC_PREFIX, + ROUTER_SERVICE_METRIC_ENTITIES, routerProperties.getAsMap()), D2TestUtils.getAndStartD2Client(zkAddress), CLUSTER_DISCOVERY_D2_SERVICE_NAME); @@ -259,8 +261,11 @@ protected void newProcess() { d2Servers, Optional.empty(), Optional.of(SslUtils.getVeniceLocalSslFactory()), - TehutiUtils - .getVeniceMetricsRepository(ROUTER_SERVICE_NAME, ROUTER_SERVICE_METRIC_PREFIX, properties.getAsMap()), + VeniceMetricsRepository.getVeniceMetricsRepository( + ROUTER_SERVICE_NAME, + ROUTER_SERVICE_METRIC_PREFIX, + ROUTER_SERVICE_METRIC_ENTITIES, + properties.getAsMap()), D2TestUtils.getAndStartD2Client(zkAddress), CLUSTER_DISCOVERY_D2_SERVICE_NAME); LOGGER.info("Started VeniceRouterWrapper: {}", this); diff --git a/services/venice-router/src/main/java/com/linkedin/venice/router/RouterServer.java b/services/venice-router/src/main/java/com/linkedin/venice/router/RouterServer.java index b70823afc4..6507642582 100644 --- a/services/venice-router/src/main/java/com/linkedin/venice/router/RouterServer.java +++ b/services/venice-router/src/main/java/com/linkedin/venice/router/RouterServer.java @@ -68,6 +68,7 @@ import com.linkedin.venice.router.stats.LongTailRetryStatsProvider; import com.linkedin.venice.router.stats.RouteHttpRequestStats; import com.linkedin.venice.router.stats.RouterHttpRequestStats; +import com.linkedin.venice.router.stats.RouterMetricEntities; import com.linkedin.venice.router.stats.RouterStats; import com.linkedin.venice.router.stats.RouterThrottleStats; import com.linkedin.venice.router.stats.SecurityStats; @@ -79,12 +80,11 @@ import com.linkedin.venice.security.SSLFactory; import com.linkedin.venice.service.AbstractVeniceService; import com.linkedin.venice.servicediscovery.ServiceDiscoveryAnnouncer; -import com.linkedin.venice.stats.TehutiUtils; import com.linkedin.venice.stats.ThreadPoolStats; import com.linkedin.venice.stats.VeniceJVMStats; -import com.linkedin.venice.stats.VeniceMetricsConfig; import com.linkedin.venice.stats.VeniceMetricsRepository; import com.linkedin.venice.stats.ZkClientStatusStats; +import com.linkedin.venice.stats.metrics.MetricEntities; import com.linkedin.venice.throttle.EventThrottler; import com.linkedin.venice.utils.DaemonThreadFactory; import com.linkedin.venice.utils.HelixUtils; @@ -109,6 +109,8 @@ import java.net.SocketAddress; import java.time.Duration; import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; import java.util.Collections; import java.util.LinkedHashMap; import java.util.List; @@ -197,7 +199,8 @@ public class RouterServer extends AbstractVeniceService { public static final String ROUTER_SERVICE_NAME = "venice-router"; public static final String ROUTER_SERVICE_METRIC_PREFIX = "router"; - + public static final Collection ROUTER_SERVICE_METRIC_ENTITIES = + Arrays.asList(RouterMetricEntities.values()); /** * Thread number used to monitor the listening port; */ @@ -275,8 +278,11 @@ public RouterServer( serviceDiscoveryAnnouncers, accessController, sslFactory, - TehutiUtils - .getVeniceMetricsRepository(ROUTER_SERVICE_NAME, ROUTER_SERVICE_METRIC_PREFIX, properties.getAsMap()), + VeniceMetricsRepository.getVeniceMetricsRepository( + ROUTER_SERVICE_NAME, + ROUTER_SERVICE_METRIC_PREFIX, + ROUTER_SERVICE_METRIC_ENTITIES, + properties.getAsMap()), null, "venice-discovery"); } @@ -419,10 +425,11 @@ public RouterServer( serviceDiscoveryAnnouncers, Optional.empty(), sslFactory, - new VeniceMetricsRepository( - new VeniceMetricsConfig.Builder().setServiceName(ROUTER_SERVICE_NAME) - .extractAndSetOtelConfigs(properties.getAsMap()) - .build()), + VeniceMetricsRepository.getVeniceMetricsRepository( + ROUTER_SERVICE_NAME, + ROUTER_SERVICE_METRIC_PREFIX, + ROUTER_SERVICE_METRIC_ENTITIES, + properties.getAsMap()), false); this.routingDataRepository = routingDataRepository; this.hybridStoreQuotaRepository = hybridStoreQuotaRepository; diff --git a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouterHttpRequestStats.java b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouterHttpRequestStats.java index 6079a7b3e8..0038853587 100644 --- a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouterHttpRequestStats.java +++ b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouterHttpRequestStats.java @@ -2,6 +2,15 @@ import static com.linkedin.venice.router.RouterServer.ROUTER_SERVICE_METRIC_PREFIX; import static com.linkedin.venice.router.RouterServer.ROUTER_SERVICE_NAME; +import static com.linkedin.venice.router.stats.RouterMetricEntities.ABORTED_RETRY_COUNT; +import static com.linkedin.venice.router.stats.RouterMetricEntities.ALLOWED_RETRY_COUNT; +import static com.linkedin.venice.router.stats.RouterMetricEntities.CALL_COUNT; +import static com.linkedin.venice.router.stats.RouterMetricEntities.CALL_KEY_COUNT; +import static com.linkedin.venice.router.stats.RouterMetricEntities.CALL_TIME; +import static com.linkedin.venice.router.stats.RouterMetricEntities.DISALLOWED_RETRY_COUNT; +import static com.linkedin.venice.router.stats.RouterMetricEntities.INCOMING_CALL_COUNT; +import static com.linkedin.venice.router.stats.RouterMetricEntities.RETRY_COUNT; +import static com.linkedin.venice.router.stats.RouterMetricEntities.RETRY_DELAY; import static com.linkedin.venice.stats.AbstractVeniceAggStats.STORE_NAME_FOR_TOTAL_STAT; import static com.linkedin.venice.stats.dimensions.VeniceHttpResponseStatusCodeCategory.getVeniceHttpResponseStatusCodeCategory; import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.HTTP_RESPONSE_STATUS_CODE; @@ -13,15 +22,6 @@ import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.VENICE_REQUEST_VALIDATION_OUTCOME; import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.VENICE_RESPONSE_STATUS_CODE_CATEGORY; import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.VENICE_STORE_NAME; -import static com.linkedin.venice.stats.metrics.modules.RouterMetricEntities.ABORTED_RETRY_COUNT; -import static com.linkedin.venice.stats.metrics.modules.RouterMetricEntities.ALLOWED_RETRY_COUNT; -import static com.linkedin.venice.stats.metrics.modules.RouterMetricEntities.CALL_COUNT; -import static com.linkedin.venice.stats.metrics.modules.RouterMetricEntities.CALL_KEY_COUNT; -import static com.linkedin.venice.stats.metrics.modules.RouterMetricEntities.CALL_TIME; -import static com.linkedin.venice.stats.metrics.modules.RouterMetricEntities.DISALLOWED_RETRY_COUNT; -import static com.linkedin.venice.stats.metrics.modules.RouterMetricEntities.INCOMING_CALL_COUNT; -import static com.linkedin.venice.stats.metrics.modules.RouterMetricEntities.RETRY_COUNT; -import static com.linkedin.venice.stats.metrics.modules.RouterMetricEntities.RETRY_DELAY; import com.linkedin.alpini.router.monitoring.ScatterGatherStats; import com.linkedin.venice.common.VeniceSystemStoreUtils; @@ -40,6 +40,7 @@ import com.linkedin.venice.stats.dimensions.VeniceResponseStatusCategory; import io.netty.handler.codec.http.HttpResponseStatus; import io.opentelemetry.api.common.Attributes; +import io.opentelemetry.api.common.AttributesBuilder; import io.tehuti.Metric; import io.tehuti.metrics.MeasurableStat; import io.tehuti.metrics.MetricConfig; @@ -53,6 +54,7 @@ import io.tehuti.metrics.stats.OccurrenceRate; import io.tehuti.metrics.stats.Rate; import io.tehuti.metrics.stats.Total; +import java.util.Map; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; @@ -155,11 +157,17 @@ public RouterHttpRequestStats( ? veniceMetricsRepository.getVeniceMetricsConfig().getMetricNamingFormat() : VeniceOpenTelemetryMetricNamingFormat.SNAKE_CASE; otelRepository = veniceMetricsRepository.getOpenTelemetryMetricsRepository(); - commonMetricDimensions = Attributes.builder() + AttributesBuilder attributesBuilder = Attributes.builder() .put(getDimensionName(VENICE_STORE_NAME), storeName) .put(getDimensionName(VENICE_REQUEST_METHOD), requestType.name().toLowerCase()) - .put(getDimensionName(VENICE_CLUSTER_NAME), clusterName) - .build(); + .put(getDimensionName(VENICE_CLUSTER_NAME), clusterName); + // add custom dimensions passed in by the user + for (Map.Entry entry: veniceMetricsRepository.getVeniceMetricsConfig() + .getOtelCustomDimensionsMap() + .entrySet()) { + attributesBuilder.put(entry.getKey(), entry.getValue()); + } + commonMetricDimensions = attributesBuilder.build(); } else { emitOpenTelemetryMetrics = false; openTelemetryMetricFormat = VeniceOpenTelemetryMetricNamingFormat.SNAKE_CASE; diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/metrics/modules/RouterMetricEntities.java b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouterMetricEntities.java similarity index 88% rename from internal/venice-client-common/src/main/java/com/linkedin/venice/stats/metrics/modules/RouterMetricEntities.java rename to services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouterMetricEntities.java index 7b82a2505a..e58dbd6169 100644 --- a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/metrics/modules/RouterMetricEntities.java +++ b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouterMetricEntities.java @@ -1,4 +1,4 @@ -package com.linkedin.venice.stats.metrics.modules; +package com.linkedin.venice.router.stats; import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.HTTP_RESPONSE_STATUS_CODE; import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.HTTP_RESPONSE_STATUS_CODE_CATEGORY; @@ -9,15 +9,13 @@ import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.VENICE_REQUEST_VALIDATION_OUTCOME; import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.VENICE_RESPONSE_STATUS_CODE_CATEGORY; import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.VENICE_STORE_NAME; +import static com.linkedin.venice.utils.Utils.setOf; import com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions; import com.linkedin.venice.stats.metrics.MetricEntities; import com.linkedin.venice.stats.metrics.MetricEntity; import com.linkedin.venice.stats.metrics.MetricType; import com.linkedin.venice.stats.metrics.MetricUnit; -import java.util.Arrays; -import java.util.Collections; -import java.util.HashSet; import java.util.Set; @@ -49,7 +47,8 @@ public enum RouterMetricEntities implements MetricEntities { VENICE_RESPONSE_STATUS_CODE_CATEGORY) ), CALL_KEY_COUNT( - "call_key_count", MetricType.HISTOGRAM_WITHOUT_BUCKETS, MetricUnit.NUMBER, "Count of keys in multi key requests", + "call_key_count", MetricType.MIN_MAX_COUNT_SUM_AGGREGATIONS, MetricUnit.NUMBER, + "Count of keys in multi key requests", setOf(VENICE_STORE_NAME, VENICE_CLUSTER_NAME, VENICE_REQUEST_METHOD, VENICE_REQUEST_VALIDATION_OUTCOME) ), RETRY_COUNT( @@ -65,7 +64,7 @@ public enum RouterMetricEntities implements MetricEntities { setOf(VENICE_STORE_NAME, VENICE_CLUSTER_NAME, VENICE_REQUEST_METHOD) ), RETRY_DELAY( - "retry_delay", MetricType.HISTOGRAM_WITHOUT_BUCKETS, MetricUnit.MILLISECONDS, "Retry delay time", + "retry_delay", MetricType.MIN_MAX_COUNT_SUM_AGGREGATIONS, MetricUnit.MILLISECONDS, "Retry delay time", setOf(VENICE_STORE_NAME, VENICE_CLUSTER_NAME, VENICE_REQUEST_METHOD) ), ABORTED_RETRY_COUNT( @@ -88,9 +87,4 @@ public enum RouterMetricEntities implements MetricEntities { public MetricEntity getMetricEntity() { return metricEntity; } - - @SafeVarargs - public static Set setOf(T... objs) { - return Collections.unmodifiableSet(new HashSet<>(Arrays.asList(objs))); - } } From 39f5388f314f024197fd480f546b9b3f76b072c7 Mon Sep 17 00:00:00 2001 From: Manoj Nagarajan Date: Fri, 22 Nov 2024 18:05:21 -0800 Subject: [PATCH 10/19] address review comments --- .../venice/stats/AbstractVeniceAggStats.java | 2 +- .../venice/stats/VeniceMetricsConfig.java | 17 +- .../venice/stats/VeniceMetricsRepository.java | 4 +- .../VeniceOpenTelemetryMetricsRepository.java | 9 +- ...va => HttpResponseStatusCodeCategory.java} | 2 +- .../dimensions/RequestRetryAbortReason.java | 16 + ...stRetryType.java => RequestRetryType.java} | 4 +- ...ome.java => RequestValidationOutcome.java} | 4 +- .../dimensions/VeniceMetricsDimensions.java | 8 +- .../VeniceRequestRetryAbortReason.java | 17 - .../VeniceResponseStatusCategory.java | 7 + .../venice/stats/metrics/MetricEntities.java | 9 - .../venice/stats/metrics/MetricEntity.java | 50 --- .../stats/metrics/MetricEntityState.java | 135 ++++++ .../venice/stats/metrics/MetricUnit.java | 2 +- ...> HttpResponseStatusCodeCategoryTest.java} | 4 +- ....java => RequestRetryAbortReasonTest.java} | 12 +- ...ypeTest.java => RequestRetryTypeTest.java} | 4 +- ...java => RequestValidationOutcomeTest.java} | 4 +- .../linkedin/venice/endToEnd/TestBatch.java | 1 - .../linkedin/venice/router/RouterServer.java | 9 +- .../router/stats/RouterHttpRequestStats.java | 386 +++++++++++------- ...cEntities.java => RouterMetricEntity.java} | 10 +- 23 files changed, 451 insertions(+), 265 deletions(-) rename internal/venice-client-common/src/main/java/com/linkedin/venice/stats/dimensions/{VeniceHttpResponseStatusCodeCategory.java => HttpResponseStatusCodeCategory.java} (94%) create mode 100644 internal/venice-client-common/src/main/java/com/linkedin/venice/stats/dimensions/RequestRetryAbortReason.java rename internal/venice-client-common/src/main/java/com/linkedin/venice/stats/dimensions/{VeniceRequestRetryType.java => RequestRetryType.java} (75%) rename internal/venice-client-common/src/main/java/com/linkedin/venice/stats/dimensions/{VeniceRequestValidationOutcome.java => RequestValidationOutcome.java} (73%) delete mode 100644 internal/venice-client-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceRequestRetryAbortReason.java delete mode 100644 internal/venice-client-common/src/main/java/com/linkedin/venice/stats/metrics/MetricEntities.java create mode 100644 internal/venice-client-common/src/main/java/com/linkedin/venice/stats/metrics/MetricEntityState.java rename internal/venice-client-common/src/test/java/com/linkedin/venice/stats/dimensions/{VeniceHttpResponseStatusCodeCategoryTest.java => HttpResponseStatusCodeCategoryTest.java} (84%) rename internal/venice-client-common/src/test/java/com/linkedin/venice/stats/dimensions/{VeniceRequestRetryAbortReasonTest.java => RequestRetryAbortReasonTest.java} (66%) rename internal/venice-client-common/src/test/java/com/linkedin/venice/stats/dimensions/{VeniceRequestRetryTypeTest.java => RequestRetryTypeTest.java} (82%) rename internal/venice-client-common/src/test/java/com/linkedin/venice/stats/dimensions/{VeniceRequestValidationOutcomeTest.java => RequestValidationOutcomeTest.java} (80%) rename services/venice-router/src/main/java/com/linkedin/venice/router/stats/{RouterMetricEntities.java => RouterMetricEntity.java} (92%) diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/AbstractVeniceAggStats.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/AbstractVeniceAggStats.java index dcf7dd0dbc..b6d852f6b8 100644 --- a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/AbstractVeniceAggStats.java +++ b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/AbstractVeniceAggStats.java @@ -13,7 +13,7 @@ public abstract class AbstractVeniceAggStats { private StatsSupplier statsFactory; private final MetricsRepository metricsRepository; - private String clusterName = null; + private final String clusterName; private AbstractVeniceAggStats( String clusterName, diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceMetricsConfig.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceMetricsConfig.java index 56428ec1e2..92ae668355 100644 --- a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceMetricsConfig.java +++ b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceMetricsConfig.java @@ -1,6 +1,6 @@ package com.linkedin.venice.stats; -import com.linkedin.venice.stats.metrics.MetricEntities; +import com.linkedin.venice.stats.metrics.MetricEntity; import io.opentelemetry.exporter.otlp.internal.OtlpConfigUtil; import io.opentelemetry.sdk.metrics.export.AggregationTemporalitySelector; import io.opentelemetry.sdk.metrics.export.MetricExporter; @@ -108,7 +108,7 @@ public class VeniceMetricsConfig { private final String serviceName; private final String metricPrefix; - private final Collection metricEntities; + private final Collection metricEntities; /** reusing tehuti's MetricConfig */ private final MetricConfig tehutiMetricConfig; @@ -174,7 +174,7 @@ private VeniceMetricsConfig(Builder builder) { public static class Builder { private String serviceName = "default_service"; private String metricPrefix = null; - private Collection metricEntities; + private Collection metricEntities; private boolean emitOtelMetrics = false; private boolean exportOtelMetricsToEndpoint = false; private Map otelCustomDimensionsMap = new HashMap<>(); @@ -200,7 +200,7 @@ public Builder setMetricPrefix(String metricPrefix) { return this; } - public Builder setMetricEntities(Collection metricEntities) { + public Builder setMetricEntities(Collection metricEntities) { this.metricEntities = metricEntities; return this; } @@ -402,7 +402,7 @@ public String getMetricPrefix() { return this.metricPrefix; } - public Collection getMetricEntities() { + public Collection getMetricEntities() { return this.metricEntities; } @@ -461,9 +461,10 @@ public MetricConfig getTehutiMetricConfig() { @Override public String toString() { return "VeniceMetricsConfig{" + "serviceName='" + serviceName + '\'' + ", metricPrefix='" + metricPrefix + '\'' - + ", emitOTelMetrics=" + emitOTelMetrics + ", exportOtelMetricsToEndpoint=" + exportOtelMetricsToEndpoint - + ", otelExportProtocol='" + otelExportProtocol + '\'' + ", otelEndpoint='" + otelEndpoint + '\'' - + ", otelHeaders=" + otelHeaders + ", exportOtelMetricsToLog=" + exportOtelMetricsToLog + + ", metricEntities=" + metricEntities + ", emitOTelMetrics=" + emitOTelMetrics + + ", exportOtelMetricsToEndpoint=" + exportOtelMetricsToEndpoint + ", otelCustomDimensionsMap=" + + otelCustomDimensionsMap + ", otelExportProtocol='" + otelExportProtocol + '\'' + ", otelEndpoint='" + + otelEndpoint + '\'' + ", otelHeaders=" + otelHeaders + ", exportOtelMetricsToLog=" + exportOtelMetricsToLog + ", metricNamingFormat=" + metricNamingFormat + ", otelAggregationTemporalitySelector=" + otelAggregationTemporalitySelector + ", useOtelExponentialHistogram=" + useOtelExponentialHistogram + ", otelExponentialHistogramMaxScale=" + otelExponentialHistogramMaxScale diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceMetricsRepository.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceMetricsRepository.java index bfd791f390..aefdba6dec 100644 --- a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceMetricsRepository.java +++ b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceMetricsRepository.java @@ -1,6 +1,6 @@ package com.linkedin.venice.stats; -import com.linkedin.venice.stats.metrics.MetricEntities; +import com.linkedin.venice.stats.metrics.MetricEntity; import io.tehuti.metrics.JmxReporter; import io.tehuti.metrics.MetricsRepository; import java.io.Closeable; @@ -53,7 +53,7 @@ public void close() { public static VeniceMetricsRepository getVeniceMetricsRepository( String serviceName, String metricPrefix, - Collection metricEntities, + Collection metricEntities, Map configs) { VeniceMetricsRepository metricsRepository = new VeniceMetricsRepository( new VeniceMetricsConfig.Builder().setServiceName(serviceName) diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceOpenTelemetryMetricsRepository.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceOpenTelemetryMetricsRepository.java index 5f69971655..753bc07e94 100644 --- a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceOpenTelemetryMetricsRepository.java +++ b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceOpenTelemetryMetricsRepository.java @@ -4,7 +4,6 @@ import static com.linkedin.venice.stats.VeniceOpenTelemetryMetricNamingFormat.validateMetricName; import com.linkedin.venice.exceptions.VeniceException; -import com.linkedin.venice.stats.metrics.MetricEntities; import com.linkedin.venice.stats.metrics.MetricEntity; import com.linkedin.venice.stats.metrics.MetricType; import com.linkedin.venice.utils.concurrent.VeniceConcurrentHashMap; @@ -87,8 +86,7 @@ private void setExponentialHistogramAggregation(SdkMeterProviderBuilder builder, .warn("No metric entities found in config: {} to configure exponential histogram", metricsConfig.toString()); } - for (MetricEntities metricEntities: metricsConfig.getMetricEntities()) { - MetricEntity metricEntity = metricEntities.getMetricEntity(); + for (MetricEntity metricEntity: metricsConfig.getMetricEntities()) { if (metricEntity.getMetricType() == MetricType.HISTOGRAM) { metricNames.add(getFullMetricName(getMetricPrefix(), metricEntity.getMetricName())); } @@ -211,7 +209,6 @@ public Object createInstrument(MetricEntity metricEntity) { } public void close() { - LOGGER.info("OpenTelemetry close"); if (sdkMeterProvider != null) { sdkMeterProvider.shutdown(); sdkMeterProvider = null; @@ -248,11 +245,11 @@ public CompletableResultCode shutdown() { } // for testing purpose - public SdkMeterProvider getSdkMeterProvider() { + SdkMeterProvider getSdkMeterProvider() { return sdkMeterProvider; } - public Meter getMeter() { + Meter getMeter() { return meter; } } diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceHttpResponseStatusCodeCategory.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/dimensions/HttpResponseStatusCodeCategory.java similarity index 94% rename from internal/venice-client-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceHttpResponseStatusCodeCategory.java rename to internal/venice-client-common/src/main/java/com/linkedin/venice/stats/dimensions/HttpResponseStatusCodeCategory.java index add7f23427..4f24644ec7 100644 --- a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceHttpResponseStatusCodeCategory.java +++ b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/dimensions/HttpResponseStatusCodeCategory.java @@ -8,7 +8,7 @@ * Maps the provided HTTP response status {@link HttpResponseStatus} to one of * 1xx, 2xx, 3xx, 4xx, 5xx categories. */ -public class VeniceHttpResponseStatusCodeCategory { +public class HttpResponseStatusCodeCategory { private static final String UNKNOWN_CATEGORY = "unknown"; public static String getVeniceHttpResponseStatusCodeCategory(HttpResponseStatus statusCode) { diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/dimensions/RequestRetryAbortReason.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/dimensions/RequestRetryAbortReason.java new file mode 100644 index 0000000000..0ec21db216 --- /dev/null +++ b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/dimensions/RequestRetryAbortReason.java @@ -0,0 +1,16 @@ +package com.linkedin.venice.stats.dimensions; + +public enum RequestRetryAbortReason { + SLOW_ROUTE("slow_route"), DELAY_CONSTRAINT("delay_constraint"), MAX_RETRY_ROUTE_LIMIT("max_retry_router_limit"), + NO_AVAILABLE_REPLICA("no_available_replica"); + + private final String abortReason; + + RequestRetryAbortReason(String abortReason) { + this.abortReason = abortReason; + } + + public String getAbortReason() { + return this.abortReason; + } +} diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceRequestRetryType.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/dimensions/RequestRetryType.java similarity index 75% rename from internal/venice-client-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceRequestRetryType.java rename to internal/venice-client-common/src/main/java/com/linkedin/venice/stats/dimensions/RequestRetryType.java index 7faa49c5a4..4aa022c7a0 100644 --- a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceRequestRetryType.java +++ b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/dimensions/RequestRetryType.java @@ -1,11 +1,11 @@ package com.linkedin.venice.stats.dimensions; -public enum VeniceRequestRetryType { +public enum RequestRetryType { ERROR_RETRY("error_retry"), LONG_TAIL_RETRY("long_tail_retry"); private final String retryType; - VeniceRequestRetryType(String retryType) { + RequestRetryType(String retryType) { this.retryType = retryType; } diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceRequestValidationOutcome.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/dimensions/RequestValidationOutcome.java similarity index 73% rename from internal/venice-client-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceRequestValidationOutcome.java rename to internal/venice-client-common/src/main/java/com/linkedin/venice/stats/dimensions/RequestValidationOutcome.java index 8b961e4592..f549403704 100644 --- a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceRequestValidationOutcome.java +++ b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/dimensions/RequestValidationOutcome.java @@ -1,11 +1,11 @@ package com.linkedin.venice.stats.dimensions; -public enum VeniceRequestValidationOutcome { +public enum RequestValidationOutcome { VALID("valid"), INVALID_KEY_COUNT_LIMIT_EXCEEDED("invalid_key_count_limit_exceeded"); private final String outcome; - VeniceRequestValidationOutcome(String outcome) { + RequestValidationOutcome(String outcome) { this.outcome = outcome; } diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceMetricsDimensions.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceMetricsDimensions.java index f588f61977..54737cc534 100644 --- a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceMetricsDimensions.java +++ b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceMetricsDimensions.java @@ -18,19 +18,19 @@ public enum VeniceMetricsDimensions { /** {@link io.netty.handler.codec.http.HttpResponseStatus} ie. 200, 400, etc */ HTTP_RESPONSE_STATUS_CODE("http.response.status_code"), - /** {@link VeniceHttpResponseStatusCodeCategory} ie. 1xx, 2xx, etc */ + /** {@link HttpResponseStatusCodeCategory} ie. 1xx, 2xx, etc */ HTTP_RESPONSE_STATUS_CODE_CATEGORY("http.response.status_code_category"), - /** {@link VeniceRequestValidationOutcome#outcome} */ + /** {@link RequestValidationOutcome#outcome} */ VENICE_REQUEST_VALIDATION_OUTCOME("venice.request.validation_outcome"), /** {@link VeniceResponseStatusCategory} */ VENICE_RESPONSE_STATUS_CODE_CATEGORY("venice.response.status_code_category"), - /** {@link VeniceRequestRetryType} */ + /** {@link RequestRetryType} */ VENICE_REQUEST_RETRY_TYPE("venice.request.retry_type"), - /** {@link VeniceRequestRetryAbortReason} */ + /** {@link RequestRetryAbortReason} */ VENICE_REQUEST_RETRY_ABORT_REASON("venice.request.retry_abort_reason"); private final String[] dimensionName = new String[VeniceOpenTelemetryMetricNamingFormat.SIZE]; diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceRequestRetryAbortReason.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceRequestRetryAbortReason.java deleted file mode 100644 index adc143042d..0000000000 --- a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceRequestRetryAbortReason.java +++ /dev/null @@ -1,17 +0,0 @@ -package com.linkedin.venice.stats.dimensions; - -public enum VeniceRequestRetryAbortReason { - RETRY_ABORTED_BY_SLOW_ROUTE("slow_route"), RETRY_ABORTED_BY_DELAY_CONSTRAINT("delay_constraint"), - RETRY_ABORTED_BY_MAX_RETRY_ROUTE_LIMIT("max_retry_router_limit"), - RETRY_ABORTED_BY_NO_AVAILABLE_REPLICA("no_available_replica"); - - private final String abortReason; - - VeniceRequestRetryAbortReason(String abortReason) { - this.abortReason = abortReason; - } - - public String getAbortReason() { - return this.abortReason; - } -} diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceResponseStatusCategory.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceResponseStatusCategory.java index 80372c0e2d..887ad509bb 100644 --- a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceResponseStatusCategory.java +++ b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceResponseStatusCategory.java @@ -1,5 +1,12 @@ package com.linkedin.venice.stats.dimensions; +/** + * How Venice categorizes the response status of a request: + * We are emitting both {@link HttpResponseStatusCodeCategory} and this enum to capture the http standard as + * well as the Venice specific categorization. For instance, venice considers key not found as a healthy + * response, but http standard would consider it a 404 (4xx) which leads to checking for both 200 and 404 + * to account for all healthy requests. This dimensions makes it easier to make Venice specific aggregations. + */ public enum VeniceResponseStatusCategory { HEALTHY("healthy"), UNHEALTHY("unhealthy"), TARDY("tardy"), THROTTLED("throttled"), BAD_REQUEST("bad_request"); diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/metrics/MetricEntities.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/metrics/MetricEntities.java deleted file mode 100644 index edaeb5ca55..0000000000 --- a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/metrics/MetricEntities.java +++ /dev/null @@ -1,9 +0,0 @@ -package com.linkedin.venice.stats.metrics; - -/** - * Interface to get {@link MetricEntity} - * All modules metric enum class should implement this interface. - */ -public interface MetricEntities { - MetricEntity getMetricEntity(); -} diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/metrics/MetricEntity.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/metrics/MetricEntity.java index eb11579c68..4419d41c7f 100644 --- a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/metrics/MetricEntity.java +++ b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/metrics/MetricEntity.java @@ -1,10 +1,6 @@ package com.linkedin.venice.stats.metrics; -import com.linkedin.venice.stats.VeniceOpenTelemetryMetricsRepository; import com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions; -import io.opentelemetry.api.common.Attributes; -import io.opentelemetry.api.metrics.DoubleHistogram; -import io.opentelemetry.api.metrics.LongCounter; import java.util.Set; import javax.annotation.Nonnull; import javax.annotation.Nullable; @@ -20,7 +16,6 @@ public class MetricEntity { private final MetricUnit unit; private final String description; private final Set dimensionsList; - private Object otelMetric = null; public MetricEntity( @Nonnull String metricName, @@ -44,14 +39,6 @@ public MetricEntity( this.dimensionsList = dimensionsList; } - public void setOtelMetric(Object otelMetric) { - this.otelMetric = otelMetric; - } - - public Object getOtelMetric() { - return otelMetric; - } - @Nonnull public String getMetricName() { return metricName; @@ -76,41 +63,4 @@ public String getDescription() { public Set getDimensionsList() { return dimensionsList; } - - /** - * create the metric - */ - public void createMetric(VeniceOpenTelemetryMetricsRepository otelRepository) { - if (otelRepository != null) { - setOtelMetric(otelRepository.createInstrument(this)); - } - } - - /** - * Record otel metrics - */ - private void recordOtelMetric(double value, Attributes otelDimensions) { - if (otelMetric != null) { - switch (metricType) { - case HISTOGRAM: - case MIN_MAX_COUNT_SUM_AGGREGATIONS: - ((DoubleHistogram) otelMetric).record(value, otelDimensions); - break; - case COUNTER: - ((LongCounter) otelMetric).add((long) value, otelDimensions); - break; - - default: - throw new IllegalArgumentException("Unsupported metric type: " + metricType); - } - } - } - - public void record(long value, Attributes otelDimensions) { - recordOtelMetric(value, otelDimensions); - } - - public void record(double value, Attributes otelDimensions) { - recordOtelMetric(value, otelDimensions); - } } diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/metrics/MetricEntityState.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/metrics/MetricEntityState.java new file mode 100644 index 0000000000..959fa3e405 --- /dev/null +++ b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/metrics/MetricEntityState.java @@ -0,0 +1,135 @@ +package com.linkedin.venice.stats.metrics; + +import com.linkedin.venice.stats.VeniceOpenTelemetryMetricsRepository; +import io.opentelemetry.api.common.Attributes; +import io.opentelemetry.api.metrics.DoubleHistogram; +import io.opentelemetry.api.metrics.LongCounter; +import io.tehuti.metrics.MeasurableStat; +import io.tehuti.metrics.Sensor; +import io.tehuti.metrics.stats.Percentiles; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + + +/** + * Holds {@link MetricEntity} and the operation state for the metric + */ +public class MetricEntityState { + private MetricEntity metricEntity; + // otel metric + private Object otelMetric = null; + // Map of tehuti names and sensors: 1 otel metric can cover multiple tehuti sensors + private Map tehutiSensors = null; + + public MetricEntityState(MetricEntity metricEntity, VeniceOpenTelemetryMetricsRepository otelRepository) { + this.metricEntity = metricEntity; + setOtelMetric(otelRepository.createInstrument(this.metricEntity)); + } + + public MetricEntityState( + MetricEntity metricEntity, + VeniceOpenTelemetryMetricsRepository otelRepository, + TehutiSensorRegistrationFunction registerTehutiSensor, + Map> tehutiMetricInput) { + this.metricEntity = metricEntity; + createMetric(otelRepository, tehutiMetricInput, registerTehutiSensor); + } + + // setters + public void setMetricEntity(MetricEntity metricEntity) { + this.metricEntity = metricEntity; + } + + public void setOtelMetric(Object otelMetric) { + this.otelMetric = otelMetric; + } + + public void setTehutiSensors(Map tehutiSensors) { + this.tehutiSensors = tehutiSensors; + } + + /** + * Add tehuti sensor to tehutiSensors map and throw exception if sensor with same name already exists + */ + public void addTehutiSensors(String name, Sensor tehutiSensor) { + if (tehutiSensors == null) { + tehutiSensors = new HashMap<>(); + } + if (tehutiSensors.put(name, tehutiSensor) != null) { + throw new IllegalArgumentException("Sensor with name '" + name + "' already exists."); + } + } + + /** + * create the metrics/Sensors + */ + @FunctionalInterface + public interface TehutiSensorRegistrationFunction { + Sensor register(String sensorName, MeasurableStat... stats); + } + + public void createMetric( + VeniceOpenTelemetryMetricsRepository otelRepository, + Map> tehutiMetricInput, + TehutiSensorRegistrationFunction registerTehutiSensor) { + // Otel metric + if (otelRepository != null) { + setOtelMetric(otelRepository.createInstrument(this.metricEntity)); + } + // tehuti metric + // loop through tehutiMetricInput and call registerTehutiSensor for each String, List pair + for (Map.Entry> entry: tehutiMetricInput.entrySet()) { + if (entry.getValue().contains(Percentiles.class)) { + addTehutiSensors( + entry.getKey(), + registerTehutiSensor.register(entry.getKey(), entry.getValue().toArray(new MeasurableStat[0]))); + + } else { + addTehutiSensors( + entry.getKey(), + registerTehutiSensor.register(entry.getKey(), entry.getValue().toArray(new MeasurableStat[0]))); + } + } + } + + /** + * Record otel metrics + */ + private void recordOtelMetric(double value, Attributes otelDimensions) { + if (otelMetric != null) { + MetricType metricType = this.metricEntity.getMetricType(); + switch (metricType) { + case HISTOGRAM: + case MIN_MAX_COUNT_SUM_AGGREGATIONS: + ((DoubleHistogram) otelMetric).record(value, otelDimensions); + break; + case COUNTER: + ((LongCounter) otelMetric).add((long) value, otelDimensions); + break; + + default: + throw new IllegalArgumentException("Unsupported metric type: " + metricType); + } + } + } + + private void recordTehutiMetric(String tehutiMetricName, double value) { + if (tehutiSensors != null) { + Sensor sensor = tehutiSensors.get(tehutiMetricName); + if (sensor != null) { + sensor.record(value); + } + } + } + + public void record(String tehutiMetricName, long value, Attributes otelDimensions) { + recordOtelMetric(value, otelDimensions); + recordTehutiMetric(tehutiMetricName, value); + } + + public void record(String tehutiMetricName, double value, Attributes otelDimensions) { + recordOtelMetric(value, otelDimensions); + recordTehutiMetric(tehutiMetricName, value); + } +} diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/metrics/MetricUnit.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/metrics/MetricUnit.java index 3cb823d623..a90ef5bd0f 100644 --- a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/metrics/MetricUnit.java +++ b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/metrics/MetricUnit.java @@ -4,5 +4,5 @@ * Metric Unit enum to define list of Units supported for metrics */ public enum MetricUnit { - NUMBER, MILLISECONDS + NUMBER, MILLISECOND } diff --git a/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/dimensions/VeniceHttpResponseStatusCodeCategoryTest.java b/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/dimensions/HttpResponseStatusCodeCategoryTest.java similarity index 84% rename from internal/venice-client-common/src/test/java/com/linkedin/venice/stats/dimensions/VeniceHttpResponseStatusCodeCategoryTest.java rename to internal/venice-client-common/src/test/java/com/linkedin/venice/stats/dimensions/HttpResponseStatusCodeCategoryTest.java index 6cc28e4ebe..9a60bcfacd 100644 --- a/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/dimensions/VeniceHttpResponseStatusCodeCategoryTest.java +++ b/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/dimensions/HttpResponseStatusCodeCategoryTest.java @@ -1,13 +1,13 @@ package com.linkedin.venice.stats.dimensions; -import static com.linkedin.venice.stats.dimensions.VeniceHttpResponseStatusCodeCategory.getVeniceHttpResponseStatusCodeCategory; +import static com.linkedin.venice.stats.dimensions.HttpResponseStatusCodeCategory.getVeniceHttpResponseStatusCodeCategory; import static org.testng.Assert.assertEquals; import io.netty.handler.codec.http.HttpResponseStatus; import org.testng.annotations.Test; -public class VeniceHttpResponseStatusCodeCategoryTest { +public class HttpResponseStatusCodeCategoryTest { @Test() public void testValues() { assertEquals(getVeniceHttpResponseStatusCodeCategory(HttpResponseStatus.PROCESSING), "1xx"); diff --git a/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/dimensions/VeniceRequestRetryAbortReasonTest.java b/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/dimensions/RequestRetryAbortReasonTest.java similarity index 66% rename from internal/venice-client-common/src/test/java/com/linkedin/venice/stats/dimensions/VeniceRequestRetryAbortReasonTest.java rename to internal/venice-client-common/src/test/java/com/linkedin/venice/stats/dimensions/RequestRetryAbortReasonTest.java index 6476b3e1c2..84c86d0de5 100644 --- a/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/dimensions/VeniceRequestRetryAbortReasonTest.java +++ b/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/dimensions/RequestRetryAbortReasonTest.java @@ -5,21 +5,21 @@ import org.testng.annotations.Test; -public class VeniceRequestRetryAbortReasonTest { +public class RequestRetryAbortReasonTest { @Test public void testRetryRequestAbortReason() { - for (VeniceRequestRetryAbortReason reason: VeniceRequestRetryAbortReason.values()) { + for (RequestRetryAbortReason reason: RequestRetryAbortReason.values()) { switch (reason) { - case RETRY_ABORTED_BY_SLOW_ROUTE: + case SLOW_ROUTE: assertEquals(reason.getAbortReason(), "slow_route"); break; - case RETRY_ABORTED_BY_DELAY_CONSTRAINT: + case DELAY_CONSTRAINT: assertEquals(reason.getAbortReason(), "delay_constraint"); break; - case RETRY_ABORTED_BY_MAX_RETRY_ROUTE_LIMIT: + case MAX_RETRY_ROUTE_LIMIT: assertEquals(reason.getAbortReason(), "max_retry_router_limit"); break; - case RETRY_ABORTED_BY_NO_AVAILABLE_REPLICA: + case NO_AVAILABLE_REPLICA: assertEquals(reason.getAbortReason(), "no_available_replica"); break; default: diff --git a/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/dimensions/VeniceRequestRetryTypeTest.java b/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/dimensions/RequestRetryTypeTest.java similarity index 82% rename from internal/venice-client-common/src/test/java/com/linkedin/venice/stats/dimensions/VeniceRequestRetryTypeTest.java rename to internal/venice-client-common/src/test/java/com/linkedin/venice/stats/dimensions/RequestRetryTypeTest.java index 52c8bfb94c..2141a93a88 100644 --- a/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/dimensions/VeniceRequestRetryTypeTest.java +++ b/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/dimensions/RequestRetryTypeTest.java @@ -5,10 +5,10 @@ import org.testng.annotations.Test; -public class VeniceRequestRetryTypeTest { +public class RequestRetryTypeTest { @Test public void testVeniceRequestRetryType() { - for (VeniceRequestRetryType retryType: VeniceRequestRetryType.values()) { + for (RequestRetryType retryType: RequestRetryType.values()) { switch (retryType) { case ERROR_RETRY: assertEquals(retryType.getRetryType(), "error_retry"); diff --git a/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/dimensions/VeniceRequestValidationOutcomeTest.java b/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/dimensions/RequestValidationOutcomeTest.java similarity index 80% rename from internal/venice-client-common/src/test/java/com/linkedin/venice/stats/dimensions/VeniceRequestValidationOutcomeTest.java rename to internal/venice-client-common/src/test/java/com/linkedin/venice/stats/dimensions/RequestValidationOutcomeTest.java index 28f804eab5..f144850be7 100644 --- a/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/dimensions/VeniceRequestValidationOutcomeTest.java +++ b/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/dimensions/RequestValidationOutcomeTest.java @@ -5,10 +5,10 @@ import org.testng.annotations.Test; -public class VeniceRequestValidationOutcomeTest { +public class RequestValidationOutcomeTest { @Test public void testVeniceRequestValidationOutcome() { - for (VeniceRequestValidationOutcome outcome: VeniceRequestValidationOutcome.values()) { + for (RequestValidationOutcome outcome: RequestValidationOutcome.values()) { switch (outcome) { case VALID: assertEquals(outcome.getOutcome(), "valid"); diff --git a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/TestBatch.java b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/TestBatch.java index a561cb58d9..d95accd2d2 100644 --- a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/TestBatch.java +++ b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/TestBatch.java @@ -274,7 +274,6 @@ public void testCompressingRecord(boolean compressionMetricCollectionEnabled, bo validator, new UpdateStoreQueryParams().setCompressionStrategy(CompressionStrategy.GZIP)); - Thread.sleep(150000); // Re-push with Kafka Input testRepush(storeName, validator); } diff --git a/services/venice-router/src/main/java/com/linkedin/venice/router/RouterServer.java b/services/venice-router/src/main/java/com/linkedin/venice/router/RouterServer.java index 6507642582..e2db771326 100644 --- a/services/venice-router/src/main/java/com/linkedin/venice/router/RouterServer.java +++ b/services/venice-router/src/main/java/com/linkedin/venice/router/RouterServer.java @@ -68,7 +68,7 @@ import com.linkedin.venice.router.stats.LongTailRetryStatsProvider; import com.linkedin.venice.router.stats.RouteHttpRequestStats; import com.linkedin.venice.router.stats.RouterHttpRequestStats; -import com.linkedin.venice.router.stats.RouterMetricEntities; +import com.linkedin.venice.router.stats.RouterMetricEntity; import com.linkedin.venice.router.stats.RouterStats; import com.linkedin.venice.router.stats.RouterThrottleStats; import com.linkedin.venice.router.stats.SecurityStats; @@ -84,7 +84,7 @@ import com.linkedin.venice.stats.VeniceJVMStats; import com.linkedin.venice.stats.VeniceMetricsRepository; import com.linkedin.venice.stats.ZkClientStatusStats; -import com.linkedin.venice.stats.metrics.MetricEntities; +import com.linkedin.venice.stats.metrics.MetricEntity; import com.linkedin.venice.throttle.EventThrottler; import com.linkedin.venice.utils.DaemonThreadFactory; import com.linkedin.venice.utils.HelixUtils; @@ -125,6 +125,7 @@ import java.util.concurrent.TimeUnit; import java.util.function.Consumer; import java.util.function.LongSupplier; +import java.util.stream.Collectors; import javax.annotation.Nonnull; import org.apache.helix.InstanceType; import org.apache.helix.manager.zk.ZKHelixManager; @@ -199,8 +200,8 @@ public class RouterServer extends AbstractVeniceService { public static final String ROUTER_SERVICE_NAME = "venice-router"; public static final String ROUTER_SERVICE_METRIC_PREFIX = "router"; - public static final Collection ROUTER_SERVICE_METRIC_ENTITIES = - Arrays.asList(RouterMetricEntities.values()); + public static final Collection ROUTER_SERVICE_METRIC_ENTITIES = Collections.unmodifiableList( + Arrays.stream(RouterMetricEntity.values()).map(RouterMetricEntity::getMetricEntity).collect(Collectors.toList())); /** * Thread number used to monitor the listening port; */ diff --git a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouterHttpRequestStats.java b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouterHttpRequestStats.java index 0038853587..2c8c04b5d2 100644 --- a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouterHttpRequestStats.java +++ b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouterHttpRequestStats.java @@ -2,17 +2,17 @@ import static com.linkedin.venice.router.RouterServer.ROUTER_SERVICE_METRIC_PREFIX; import static com.linkedin.venice.router.RouterServer.ROUTER_SERVICE_NAME; -import static com.linkedin.venice.router.stats.RouterMetricEntities.ABORTED_RETRY_COUNT; -import static com.linkedin.venice.router.stats.RouterMetricEntities.ALLOWED_RETRY_COUNT; -import static com.linkedin.venice.router.stats.RouterMetricEntities.CALL_COUNT; -import static com.linkedin.venice.router.stats.RouterMetricEntities.CALL_KEY_COUNT; -import static com.linkedin.venice.router.stats.RouterMetricEntities.CALL_TIME; -import static com.linkedin.venice.router.stats.RouterMetricEntities.DISALLOWED_RETRY_COUNT; -import static com.linkedin.venice.router.stats.RouterMetricEntities.INCOMING_CALL_COUNT; -import static com.linkedin.venice.router.stats.RouterMetricEntities.RETRY_COUNT; -import static com.linkedin.venice.router.stats.RouterMetricEntities.RETRY_DELAY; +import static com.linkedin.venice.router.stats.RouterMetricEntity.ABORTED_RETRY_COUNT; +import static com.linkedin.venice.router.stats.RouterMetricEntity.ALLOWED_RETRY_COUNT; +import static com.linkedin.venice.router.stats.RouterMetricEntity.CALL_COUNT; +import static com.linkedin.venice.router.stats.RouterMetricEntity.CALL_KEY_COUNT; +import static com.linkedin.venice.router.stats.RouterMetricEntity.CALL_TIME; +import static com.linkedin.venice.router.stats.RouterMetricEntity.DISALLOWED_RETRY_COUNT; +import static com.linkedin.venice.router.stats.RouterMetricEntity.INCOMING_CALL_COUNT; +import static com.linkedin.venice.router.stats.RouterMetricEntity.RETRY_COUNT; +import static com.linkedin.venice.router.stats.RouterMetricEntity.RETRY_DELAY; import static com.linkedin.venice.stats.AbstractVeniceAggStats.STORE_NAME_FOR_TOTAL_STAT; -import static com.linkedin.venice.stats.dimensions.VeniceHttpResponseStatusCodeCategory.getVeniceHttpResponseStatusCodeCategory; +import static com.linkedin.venice.stats.dimensions.HttpResponseStatusCodeCategory.getVeniceHttpResponseStatusCodeCategory; import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.HTTP_RESPONSE_STATUS_CODE; import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.HTTP_RESPONSE_STATUS_CODE_CATEGORY; import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.VENICE_CLUSTER_NAME; @@ -33,11 +33,12 @@ import com.linkedin.venice.stats.VeniceMetricsRepository; import com.linkedin.venice.stats.VeniceOpenTelemetryMetricNamingFormat; import com.linkedin.venice.stats.VeniceOpenTelemetryMetricsRepository; +import com.linkedin.venice.stats.dimensions.RequestRetryAbortReason; +import com.linkedin.venice.stats.dimensions.RequestRetryType; +import com.linkedin.venice.stats.dimensions.RequestValidationOutcome; import com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions; -import com.linkedin.venice.stats.dimensions.VeniceRequestRetryAbortReason; -import com.linkedin.venice.stats.dimensions.VeniceRequestRetryType; -import com.linkedin.venice.stats.dimensions.VeniceRequestValidationOutcome; import com.linkedin.venice.stats.dimensions.VeniceResponseStatusCategory; +import com.linkedin.venice.stats.metrics.MetricEntityState; import io.netty.handler.codec.http.HttpResponseStatus; import io.opentelemetry.api.common.Attributes; import io.opentelemetry.api.common.AttributesBuilder; @@ -54,6 +55,10 @@ import io.tehuti.metrics.stats.OccurrenceRate; import io.tehuti.metrics.stats.Rate; import io.tehuti.metrics.stats.Total; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; import java.util.Map; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; @@ -73,39 +78,28 @@ public class RouterHttpRequestStats extends AbstractVeniceHttpStats { } /** metrics to track incoming requests */ - private final Sensor incomingRequestSensor; + private final MetricEntityState incomingRequestMetric; /** metrics to track response handling */ - private final Sensor healthySensor; - private final Sensor unhealthySensor; - private final Sensor tardySensor; + private final MetricEntityState requestMetric; private final Sensor healthyRequestRateSensor; private final Sensor tardyRequestRatioSensor; - private final Sensor throttleSensor; - private final Sensor badRequestSensor; /** latency metrics */ - private final Sensor latencySensor; - private final Sensor healthyRequestLatencySensor; - private final Sensor unhealthyRequestLatencySensor; - private final Sensor tardyRequestLatencySensor; - private final Sensor throttledRequestLatencySensor; + private final Sensor latencyTehutiSensor; // This can be removed while removing tehuti + private final MetricEntityState latencyMetric; /** retry metrics */ - private final Sensor errorRetryCountSensor; - private final Sensor allowedRetryRequestSensor; - private final Sensor disallowedRetryRequestSensor; - private final Sensor retryDelaySensor; + private final MetricEntityState retryCountMetric; + private final MetricEntityState allowedRetryCountMetric; + private final MetricEntityState disallowedRetryCountMetric; + private final MetricEntityState retryDelayMetric; /** retry aborted metrics */ - private final Sensor delayConstraintAbortedRetryRequest; - private final Sensor slowRouteAbortedRetryRequest; - private final Sensor retryRouteLimitAbortedRetryRequest; - private final Sensor noAvailableReplicaAbortedRetryRequest; + private final MetricEntityState abortedRetryCountMetric; /** key count metrics */ - private final Sensor keyNumSensor; - private final Sensor badRequestKeyCountSensor; + private final MetricEntityState keyCountMetric; /** OTel metrics yet to be added */ private final Sensor requestSizeSensor; @@ -150,21 +144,16 @@ public RouterHttpRequestStats( VeniceOpenTelemetryMetricsRepository otelRepository; if (metricsRepository instanceof VeniceMetricsRepository) { VeniceMetricsRepository veniceMetricsRepository = (VeniceMetricsRepository) metricsRepository; - emitOpenTelemetryMetrics = (veniceMetricsRepository != null) - ? veniceMetricsRepository.getVeniceMetricsConfig().emitOtelMetrics() - : false; - openTelemetryMetricFormat = (veniceMetricsRepository != null) - ? veniceMetricsRepository.getVeniceMetricsConfig().getMetricNamingFormat() - : VeniceOpenTelemetryMetricNamingFormat.SNAKE_CASE; + VeniceMetricsConfig veniceMetricsConfig = veniceMetricsRepository.getVeniceMetricsConfig(); + emitOpenTelemetryMetrics = veniceMetricsConfig.emitOtelMetrics(); + openTelemetryMetricFormat = veniceMetricsConfig.getMetricNamingFormat(); otelRepository = veniceMetricsRepository.getOpenTelemetryMetricsRepository(); AttributesBuilder attributesBuilder = Attributes.builder() .put(getDimensionName(VENICE_STORE_NAME), storeName) .put(getDimensionName(VENICE_REQUEST_METHOD), requestType.name().toLowerCase()) .put(getDimensionName(VENICE_CLUSTER_NAME), clusterName); // add custom dimensions passed in by the user - for (Map.Entry entry: veniceMetricsRepository.getVeniceMetricsConfig() - .getOtelCustomDimensionsMap() - .entrySet()) { + for (Map.Entry entry: veniceMetricsConfig.getOtelCustomDimensionsMap().entrySet()) { attributesBuilder.put(entry.getKey(), entry.getValue()); } commonMetricDimensions = attributesBuilder.build(); @@ -180,49 +169,126 @@ public RouterHttpRequestStats( Rate healthyRequestRate = new OccurrenceRate(); Rate tardyRequestRate = new OccurrenceRate(); - incomingRequestSensor = registerSensor("request", new Count(), requestRate); - INCOMING_CALL_COUNT.getMetricEntity().createMetric(otelRepository); + incomingRequestMetric = new MetricEntityState( + INCOMING_CALL_COUNT.getMetricEntity(), + otelRepository, + this::registerSensor, + new HashMap>() { + { + put(MetricNamesInTehuti.INCOMING_REQUEST, Arrays.asList(new Count(), requestRate)); + } + }); - healthySensor = registerSensor("healthy_request", new Count(), healthyRequestRate); - unhealthySensor = registerSensor("unhealthy_request", new Count()); - tardySensor = registerSensor("tardy_request", new Count(), tardyRequestRate); - throttleSensor = registerSensor("throttled_request", new Count()); healthyRequestRateSensor = registerSensor(new TehutiUtils.SimpleRatioStat(healthyRequestRate, requestRate, "healthy_request_ratio")); tardyRequestRatioSensor = registerSensor(new TehutiUtils.SimpleRatioStat(tardyRequestRate, requestRate, "tardy_request_ratio")); - badRequestSensor = registerSensor("bad_request", new Count()); - CALL_COUNT.getMetricEntity().createMetric(otelRepository); - - errorRetryCountSensor = registerSensor("error_retry", new Count()); - RETRY_COUNT.getMetricEntity().createMetric(otelRepository); - allowedRetryRequestSensor = registerSensor("allowed_retry_request_count", new OccurrenceRate()); - ALLOWED_RETRY_COUNT.getMetricEntity().createMetric(otelRepository); - disallowedRetryRequestSensor = registerSensor("disallowed_retry_request_count", new OccurrenceRate()); - DISALLOWED_RETRY_COUNT.getMetricEntity().createMetric(otelRepository); + requestMetric = new MetricEntityState( + CALL_COUNT.getMetricEntity(), + otelRepository, + this::registerSensor, + new HashMap>() { + { + put(MetricNamesInTehuti.HEALTHY_REQUEST, Arrays.asList(new Count(), healthyRequestRate)); + put(MetricNamesInTehuti.UNHEALTHY_REQUEST, Collections.singletonList(new Count())); + put(MetricNamesInTehuti.TARDY_REQUEST, Arrays.asList(new Count(), tardyRequestRate)); + put(MetricNamesInTehuti.THROTTLED_REQUEST, Collections.singletonList(new Count())); + put(MetricNamesInTehuti.BAD_REQUEST, Collections.singletonList(new Count())); + } + }); + + latencyTehutiSensor = registerSensorWithDetailedPercentiles("latency", new Avg(), new Max(0)); + latencyMetric = new MetricEntityState( + CALL_TIME.getMetricEntity(), + otelRepository, + this::registerSensor, + new HashMap>() { + { + put( + MetricNamesInTehuti.HEALTHY_REQUEST_LATENCY, + Arrays.asList( + new Avg(), + new Max(0), + TehutiUtils.getPercentileStatForNetworkLatency( + getName(), + getFullMetricName(MetricNamesInTehuti.HEALTHY_REQUEST_LATENCY)))); + put(MetricNamesInTehuti.UNHEALTHY_REQUEST_LATENCY, Arrays.asList(new Avg(), new Max(0))); + put(MetricNamesInTehuti.TARDY_REQUEST_LATENCY, Arrays.asList(new Avg(), new Max(0))); + put(MetricNamesInTehuti.THROTTLED_REQUEST_LATENCY, Arrays.asList(new Avg(), new Max(0))); + } + }); + + retryCountMetric = new MetricEntityState( + RETRY_COUNT.getMetricEntity(), + otelRepository, + this::registerSensor, + new HashMap>() { + { + put(MetricNamesInTehuti.ERROR_RETRY, Collections.singletonList(new Count())); + } + }); + + allowedRetryCountMetric = new MetricEntityState( + ALLOWED_RETRY_COUNT.getMetricEntity(), + otelRepository, + this::registerSensor, + new HashMap>() { + { + put(MetricNamesInTehuti.ALLOWED_RETRY_REQUEST, Collections.singletonList(new OccurrenceRate())); + } + }); + + disallowedRetryCountMetric = new MetricEntityState( + DISALLOWED_RETRY_COUNT.getMetricEntity(), + otelRepository, + this::registerSensor, + new HashMap>() { + { + put(MetricNamesInTehuti.DISALLOWED_RETRY_REQUEST, Collections.singletonList(new OccurrenceRate())); + } + }); + + retryDelayMetric = new MetricEntityState( + RETRY_DELAY.getMetricEntity(), + otelRepository, + this::registerSensor, + new HashMap>() { + { + put(MetricNamesInTehuti.RETRY_DELAY, Arrays.asList(new Avg(), new Max())); + } + }); + + abortedRetryCountMetric = new MetricEntityState( + ABORTED_RETRY_COUNT.getMetricEntity(), + otelRepository, + this::registerSensor, + new HashMap>() { + { + put(MetricNamesInTehuti.DELAY_CONSTRAINT_ABORTED_RETRY_REQUEST, Collections.singletonList(new Count())); + put(MetricNamesInTehuti.SLOW_ROUTE_ABORTED_RETRY_REQUEST, Collections.singletonList(new Count())); + put(MetricNamesInTehuti.RETRY_ROUTE_LIMIT_ABORTED_RETRY_REQUEST, Collections.singletonList(new Count())); + put(MetricNamesInTehuti.NO_AVAILABLE_REPLICA_ABORTED_RETRY_REQUEST, Collections.singletonList(new Count())); + } + }); + + keyCountMetric = new MetricEntityState( + CALL_KEY_COUNT.getMetricEntity(), + otelRepository, + this::registerSensor, + new HashMap>() { + { + put(MetricNamesInTehuti.KEY_NUM, Arrays.asList(new OccurrenceRate(), new Avg(), new Max(0))); + put(MetricNamesInTehuti.BAD_REQUEST_KEY_COUNT, Arrays.asList(new OccurrenceRate(), new Avg(), new Max(0))); + } + }); + errorRetryAttemptTriggeredByPendingRequestCheckSensor = registerSensor("error_retry_attempt_triggered_by_pending_request_check", new OccurrenceRate()); - retryDelaySensor = registerSensor("retry_delay", new Avg(), new Max()); - RETRY_DELAY.getMetricEntity().createMetric(otelRepository); - - delayConstraintAbortedRetryRequest = registerSensor("delay_constraint_aborted_retry_request", new Count()); - slowRouteAbortedRetryRequest = registerSensor("slow_route_aborted_retry_request", new Count()); - retryRouteLimitAbortedRetryRequest = registerSensor("retry_route_limit_aborted_retry_request", new Count()); - noAvailableReplicaAbortedRetryRequest = registerSensor("no_available_replica_aborted_retry_request", new Count()); - ABORTED_RETRY_COUNT.getMetricEntity().createMetric(otelRepository); unavailableReplicaStreamingRequestSensor = registerSensor("unavailable_replica_streaming_request", new Count()); requestThrottledByRouterCapacitySensor = registerSensor("request_throttled_by_router_capacity", new Count()); fanoutRequestCountSensor = registerSensor("fanout_request_count", new Avg(), new Max(0)); - latencySensor = registerSensorWithDetailedPercentiles("latency", new Avg(), new Max(0)); - healthyRequestLatencySensor = - registerSensorWithDetailedPercentiles("healthy_request_latency", new Avg(), new Max(0)); - unhealthyRequestLatencySensor = registerSensor("unhealthy_request_latency", new Avg(), new Max(0)); - tardyRequestLatencySensor = registerSensor("tardy_request_latency", new Avg(), new Max(0)); - throttledRequestLatencySensor = registerSensor("throttled_request_latency", new Avg(), new Max(0)); - CALL_TIME.getMetricEntity().createMetric(otelRepository); - routerResponseWaitingTimeSensor = registerSensor( "response_waiting_time", TehutiUtils.getPercentileStat(getName(), getFullMetricName("response_waiting_time"))); @@ -256,10 +322,6 @@ public RouterHttpRequestStats( (ignored, ignored2) -> scatterGatherStats.getTotalRetriesWinner(), "retry_faster_than_original_count")); - keyNumSensor = registerSensor("key_num", new Avg(), new Max(0)); - badRequestKeyCountSensor = registerSensor("bad_request_key_count", new OccurrenceRate(), new Avg(), new Max()); - CALL_KEY_COUNT.getMetricEntity().createMetric(otelRepository); - /** * request_usage.Total is incoming KPS while request_usage.OccurrenceRate is QPS */ @@ -309,30 +371,31 @@ private String getDimensionName(VeniceMetricsDimensions dimension) { * types of requests also have their latencies logged at the same time. */ public void recordIncomingRequest() { - incomingRequestSensor.record(); + incomingRequestMetric.record(MetricNamesInTehuti.INCOMING_REQUEST, 1, commonMetricDimensions); inFlightRequestSensor.record(currentInFlightRequest.incrementAndGet()); totalInflightRequestSensor.record(); - INCOMING_CALL_COUNT.getMetricEntity().record(1, commonMetricDimensions); } public void recordHealthyRequest(Double latency, HttpResponseStatus responseStatus) { - healthySensor.record(); - recordRequestSensorOtel(responseStatus, VeniceResponseStatusCategory.HEALTHY); + String metricNameInTehuti = MetricNamesInTehuti.HEALTHY_REQUEST; + VeniceResponseStatusCategory veniceResponseStatusCategory = VeniceResponseStatusCategory.HEALTHY; + recordRequestMetric(metricNameInTehuti, responseStatus, veniceResponseStatusCategory); if (latency != null) { - healthyRequestLatencySensor.record(latency); - recordLatencySensorOtel(latency, responseStatus, VeniceResponseStatusCategory.HEALTHY); + recordLatencyMetric(metricNameInTehuti, latency, responseStatus, veniceResponseStatusCategory); } } public void recordUnhealthyRequest(HttpResponseStatus responseStatus) { - unhealthySensor.record(); - recordRequestSensorOtel(responseStatus, VeniceResponseStatusCategory.UNHEALTHY); + recordRequestMetric(MetricNamesInTehuti.UNHEALTHY_REQUEST, responseStatus, VeniceResponseStatusCategory.UNHEALTHY); } public void recordUnhealthyRequest(double latency, HttpResponseStatus responseStatus) { recordUnhealthyRequest(responseStatus); - unhealthyRequestLatencySensor.record(latency); - recordLatencySensorOtel(latency, responseStatus, VeniceResponseStatusCategory.UNHEALTHY); + recordLatencyMetric( + MetricNamesInTehuti.UNHEALTHY_REQUEST_LATENCY, + latency, + responseStatus, + VeniceResponseStatusCategory.UNHEALTHY); } public void recordUnavailableReplicaStreamingRequest() { @@ -348,16 +411,19 @@ public void recordReadQuotaUsage(int quotaUsage) { } public void recordTardyRequest(double latency, HttpResponseStatus responseStatus) { - tardySensor.record(); - recordRequestSensorOtel(responseStatus, VeniceResponseStatusCategory.TARDY); - tardyRequestLatencySensor.record(latency); - recordLatencySensorOtel(latency, responseStatus, VeniceResponseStatusCategory.TARDY); + String metricNameInTehuti = MetricNamesInTehuti.TARDY_REQUEST; + VeniceResponseStatusCategory veniceResponseStatusCategory = VeniceResponseStatusCategory.TARDY; + recordRequestMetric(metricNameInTehuti, responseStatus, veniceResponseStatusCategory); + recordLatencyMetric(metricNameInTehuti, latency, responseStatus, veniceResponseStatusCategory); } public void recordThrottledRequest(double latency, HttpResponseStatus responseStatus) { recordThrottledRequest(responseStatus); - throttledRequestLatencySensor.record(latency); - recordLatencySensorOtel(latency, responseStatus, VeniceResponseStatusCategory.THROTTLED); + recordLatencyMetric( + MetricNamesInTehuti.THROTTLED_REQUEST_LATENCY, + latency, + responseStatus, + VeniceResponseStatusCategory.THROTTLED); } /** @@ -368,45 +434,41 @@ public void recordThrottledRequest(double latency, HttpResponseStatus responseSt * TODO: Remove this overload after fixing the above. */ public void recordThrottledRequest(HttpResponseStatus responseStatus) { - throttleSensor.record(); - recordRequestSensorOtel(responseStatus, VeniceResponseStatusCategory.THROTTLED); + recordRequestMetric(MetricNamesInTehuti.THROTTLED_REQUEST, responseStatus, VeniceResponseStatusCategory.THROTTLED); } public void recordErrorRetryCount() { - errorRetryCountSensor.record(); - recordRetryTriggeredSensorOtel(VeniceRequestRetryType.ERROR_RETRY); + recordRetryTriggeredSensorOtel(MetricNamesInTehuti.ERROR_RETRY, RequestRetryType.ERROR_RETRY); } - public void recordRetryTriggeredSensorOtel(VeniceRequestRetryType retryType) { + public void recordRetryTriggeredSensorOtel(String tetutiMetricName, RequestRetryType retryType) { + Attributes dimensions = null; if (emitOpenTelemetryMetrics) { - Attributes dimensions = Attributes.builder() + dimensions = Attributes.builder() .putAll(commonMetricDimensions) .put(getDimensionName(VENICE_REQUEST_RETRY_TYPE), retryType.getRetryType()) .build(); - RETRY_COUNT.getMetricEntity().record(1, dimensions); } + retryCountMetric.record(tetutiMetricName, 1, dimensions); } - public void recordAbortedRetrySensorOtel(VeniceRequestRetryAbortReason abortReason) { + public void recordAbortedRetrySensorOtel(String tetutiMetricName, RequestRetryAbortReason abortReason) { + Attributes dimensions = null; if (emitOpenTelemetryMetrics) { - Attributes dimensions = Attributes.builder() + dimensions = Attributes.builder() .putAll(commonMetricDimensions) .put(getDimensionName(VENICE_REQUEST_RETRY_ABORT_REASON), abortReason.getAbortReason()) .build(); - ABORTED_RETRY_COUNT.getMetricEntity().record(1, dimensions); } + abortedRetryCountMetric.record(tetutiMetricName, 1, dimensions); } public void recordBadRequest(HttpResponseStatus responseStatus) { - badRequestSensor.record(); - recordRequestSensorOtel(responseStatus, VeniceResponseStatusCategory.BAD_REQUEST); + recordRequestMetric(MetricNamesInTehuti.BAD_REQUEST, responseStatus, VeniceResponseStatusCategory.BAD_REQUEST); } public void recordBadRequestKeyCount(int keyCount) { - badRequestKeyCountSensor.record(keyCount); - if (emitOpenTelemetryMetrics) { - recordKeyCountSensorOtel(keyCount, VeniceRequestValidationOutcome.INVALID_KEY_COUNT_LIMIT_EXCEEDED); - } + recordKeyCountMetric(keyCount, RequestValidationOutcome.INVALID_KEY_COUNT_LIMIT_EXCEEDED); } public void recordRequestThrottledByRouterCapacity() { @@ -420,15 +482,17 @@ public void recordFanoutRequestCount(int count) { } public void recordLatency(double latency) { - latencySensor.record(latency); + latencyTehutiSensor.record(latency); } - public void recordLatencySensorOtel( + public void recordLatencyMetric( + String tehutiMetricName, double latency, HttpResponseStatus responseStatus, VeniceResponseStatusCategory veniceResponseStatusCategory) { + Attributes dimensions = null; if (emitOpenTelemetryMetrics) { - Attributes dimensions = Attributes.builder() + dimensions = Attributes.builder() .putAll(commonMetricDimensions) // only add HTTP_RESPONSE_STATUS_CODE_CATEGORY to reduce the cardinality for histogram .put( @@ -436,15 +500,17 @@ public void recordLatencySensorOtel( getVeniceHttpResponseStatusCodeCategory(responseStatus)) .put(getDimensionName(VENICE_RESPONSE_STATUS_CODE_CATEGORY), veniceResponseStatusCategory.getCategory()) .build(); - CALL_TIME.getMetricEntity().record(latency, dimensions); } + latencyMetric.record(tehutiMetricName, latency, dimensions); } - public void recordRequestSensorOtel( + public void recordRequestMetric( + String tetutiMetricName, HttpResponseStatus responseStatus, VeniceResponseStatusCategory veniceResponseStatusCategory) { + Attributes dimensions = null; if (emitOpenTelemetryMetrics) { - Attributes dimensions = Attributes.builder() + dimensions = Attributes.builder() .putAll(commonMetricDimensions) .put( getDimensionName(HTTP_RESPONSE_STATUS_CODE_CATEGORY), @@ -452,8 +518,8 @@ public void recordRequestSensorOtel( .put(getDimensionName(VENICE_RESPONSE_STATUS_CODE_CATEGORY), veniceResponseStatusCategory.getCategory()) .put(getDimensionName(HTTP_RESPONSE_STATUS_CODE), responseStatus.codeAsText().toString()) .build(); - CALL_COUNT.getMetricEntity().record(1, dimensions); } + requestMetric.record(tetutiMetricName, 1, dimensions); } public void recordResponseWaitingTime(double waitingTime) { @@ -485,21 +551,18 @@ public void recordFindUnhealthyHostRequest() { } public void recordKeyNum(int keyNum) { - keyNumSensor.record(keyNum); - if (emitOpenTelemetryMetrics) { - recordKeyCountSensorOtel(keyNum, VeniceRequestValidationOutcome.VALID); - } + recordKeyCountMetric(keyNum, RequestValidationOutcome.VALID); } - public void recordKeyCountSensorOtel(int keyNum, VeniceRequestValidationOutcome outcome) { - keyNumSensor.record(keyNum); + public void recordKeyCountMetric(int keyNum, RequestValidationOutcome outcome) { + Attributes dimensions = null; if (emitOpenTelemetryMetrics) { - Attributes dimensions = Attributes.builder() + dimensions = Attributes.builder() .putAll(commonMetricDimensions) .put(getDimensionName(VENICE_REQUEST_VALIDATION_OUTCOME), outcome.getOutcome()) .build(); - CALL_KEY_COUNT.getMetricEntity().record(keyNum, dimensions); } + keyCountMetric.record(MetricNamesInTehuti.KEY_NUM, keyNum, dimensions); } public void recordRequestUsage(int usage) { @@ -523,23 +586,27 @@ public void recordUnavailableRequest() { } public void recordDelayConstraintAbortedRetryRequest() { - delayConstraintAbortedRetryRequest.record(); - recordAbortedRetrySensorOtel(VeniceRequestRetryAbortReason.RETRY_ABORTED_BY_DELAY_CONSTRAINT); + recordAbortedRetrySensorOtel( + MetricNamesInTehuti.DELAY_CONSTRAINT_ABORTED_RETRY_REQUEST, + RequestRetryAbortReason.DELAY_CONSTRAINT); } public void recordSlowRouteAbortedRetryRequest() { - slowRouteAbortedRetryRequest.record(); - recordAbortedRetrySensorOtel(VeniceRequestRetryAbortReason.RETRY_ABORTED_BY_SLOW_ROUTE); + recordAbortedRetrySensorOtel( + MetricNamesInTehuti.SLOW_ROUTE_ABORTED_RETRY_REQUEST, + RequestRetryAbortReason.SLOW_ROUTE); } public void recordRetryRouteLimitAbortedRetryRequest() { - retryRouteLimitAbortedRetryRequest.record(); - recordAbortedRetrySensorOtel(VeniceRequestRetryAbortReason.RETRY_ABORTED_BY_MAX_RETRY_ROUTE_LIMIT); + recordAbortedRetrySensorOtel( + MetricNamesInTehuti.RETRY_ROUTE_LIMIT_ABORTED_RETRY_REQUEST, + RequestRetryAbortReason.MAX_RETRY_ROUTE_LIMIT); } public void recordNoAvailableReplicaAbortedRetryRequest() { - noAvailableReplicaAbortedRetryRequest.record(); - recordAbortedRetrySensorOtel(VeniceRequestRetryAbortReason.RETRY_ABORTED_BY_NO_AVAILABLE_REPLICA); + recordAbortedRetrySensorOtel( + MetricNamesInTehuti.NO_AVAILABLE_REPLICA_ABORTED_RETRY_REQUEST, + RequestRetryAbortReason.NO_AVAILABLE_REPLICA); } public void recordKeySizeInByte(long keySize) { @@ -558,13 +625,11 @@ public void recordResponse() { } public void recordAllowedRetryRequest() { - allowedRetryRequestSensor.record(); - ALLOWED_RETRY_COUNT.getMetricEntity().record(1, commonMetricDimensions); + allowedRetryCountMetric.record(MetricNamesInTehuti.ALLOWED_RETRY_REQUEST, 1, commonMetricDimensions); } public void recordDisallowedRetryRequest() { - disallowedRetryRequestSensor.record(); - DISALLOWED_RETRY_COUNT.getMetricEntity().record(1, commonMetricDimensions); + disallowedRetryCountMetric.record(MetricNamesInTehuti.DISALLOWED_RETRY_REQUEST, 1, commonMetricDimensions); } public void recordErrorRetryAttemptTriggeredByPendingRequestCheck() { @@ -572,8 +637,7 @@ public void recordErrorRetryAttemptTriggeredByPendingRequestCheck() { } public void recordRetryDelay(double delay) { - retryDelaySensor.record(delay); - RETRY_DELAY.getMetricEntity().record(delay, commonMetricDimensions); + retryDelayMetric.record(MetricNamesInTehuti.RETRY_DELAY, delay, commonMetricDimensions); } public void recordMetaStoreShadowRead() { @@ -590,4 +654,48 @@ static public boolean hasInFlightRequests() { // max return -infinity when there are no samples. validate only against finite value return Double.isFinite(metric.value()) ? metric.value() > 0.0 : false; } + + /** + * Metric names for tehuti metrics used in this class + */ + private static class MetricNamesInTehuti { + /** for {@link RouterMetricEntity#INCOMING_CALL_COUNT} */ + private final static String INCOMING_REQUEST = "request"; + + /** for {@link RouterMetricEntity#CALL_COUNT} */ + private final static String HEALTHY_REQUEST = "healthy_request"; + private final static String UNHEALTHY_REQUEST = "unhealthy_request"; + private final static String TARDY_REQUEST = "tardy_request"; + private final static String THROTTLED_REQUEST = "throttled_request"; + private final static String BAD_REQUEST = "bad_request"; + + /** for {@link RouterMetricEntity#CALL_TIME} */ + private final static String HEALTHY_REQUEST_LATENCY = "healthy_request_latency"; + private final static String UNHEALTHY_REQUEST_LATENCY = "unhealthy_request_latency"; + private final static String TARDY_REQUEST_LATENCY = "tardy_request_latency"; + private final static String THROTTLED_REQUEST_LATENCY = "throttled_request_latency"; + + /** for {@link RouterMetricEntity#RETRY_COUNT} */ + private final static String ERROR_RETRY = "error_retry"; + + /** for {@link RouterMetricEntity#ALLOWED_RETRY_COUNT} */ + private final static String ALLOWED_RETRY_REQUEST = "allowed_retry_request_count"; + + /** for {@link RouterMetricEntity#DISALLOWED_RETRY_COUNT} */ + private final static String DISALLOWED_RETRY_REQUEST = "disallowed_retry_request_count"; + + /** for {@link RouterMetricEntity#RETRY_DELAY} */ + private final static String RETRY_DELAY = "retry_delay"; + + /** for {@link RouterMetricEntity#ABORTED_RETRY_COUNT} */ + private final static String DELAY_CONSTRAINT_ABORTED_RETRY_REQUEST = "delay_constraint_aborted_retry_request"; + private final static String SLOW_ROUTE_ABORTED_RETRY_REQUEST = "slow_route_aborted_retry_request"; + private final static String RETRY_ROUTE_LIMIT_ABORTED_RETRY_REQUEST = "retry_route_limit_aborted_retry_request"; + private final static String NO_AVAILABLE_REPLICA_ABORTED_RETRY_REQUEST = + "no_available_replica_aborted_retry_request"; + + /** for {@link RouterMetricEntity#CALL_KEY_COUNT} */ + private final static String KEY_NUM = "key_num"; + private final static String BAD_REQUEST_KEY_COUNT = "bad_request_key_count"; + } } diff --git a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouterMetricEntities.java b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouterMetricEntity.java similarity index 92% rename from services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouterMetricEntities.java rename to services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouterMetricEntity.java index e58dbd6169..f542e6dc83 100644 --- a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouterMetricEntities.java +++ b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouterMetricEntity.java @@ -12,7 +12,6 @@ import static com.linkedin.venice.utils.Utils.setOf; import com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions; -import com.linkedin.venice.stats.metrics.MetricEntities; import com.linkedin.venice.stats.metrics.MetricEntity; import com.linkedin.venice.stats.metrics.MetricType; import com.linkedin.venice.stats.metrics.MetricUnit; @@ -22,7 +21,7 @@ /** * List all Metric entities for router */ -public enum RouterMetricEntities implements MetricEntities { +public enum RouterMetricEntity { INCOMING_CALL_COUNT( "incoming_call_count", MetricType.COUNTER, MetricUnit.NUMBER, "Count of all incoming requests", setOf(VENICE_STORE_NAME, VENICE_CLUSTER_NAME, VENICE_REQUEST_METHOD) @@ -38,7 +37,7 @@ public enum RouterMetricEntities implements MetricEntities { VENICE_RESPONSE_STATUS_CODE_CATEGORY) ), CALL_TIME( - "call_time", MetricType.HISTOGRAM, MetricUnit.MILLISECONDS, "Latency based on all responses", + "call_time", MetricType.HISTOGRAM, MetricUnit.MILLISECOND, "Latency based on all responses", setOf( VENICE_STORE_NAME, VENICE_CLUSTER_NAME, @@ -64,7 +63,7 @@ public enum RouterMetricEntities implements MetricEntities { setOf(VENICE_STORE_NAME, VENICE_CLUSTER_NAME, VENICE_REQUEST_METHOD) ), RETRY_DELAY( - "retry_delay", MetricType.MIN_MAX_COUNT_SUM_AGGREGATIONS, MetricUnit.MILLISECONDS, "Retry delay time", + "retry_delay", MetricType.MIN_MAX_COUNT_SUM_AGGREGATIONS, MetricUnit.MILLISECOND, "Retry delay time", setOf(VENICE_STORE_NAME, VENICE_CLUSTER_NAME, VENICE_REQUEST_METHOD) ), ABORTED_RETRY_COUNT( @@ -74,7 +73,7 @@ public enum RouterMetricEntities implements MetricEntities { private final MetricEntity metricEntity; - RouterMetricEntities( + RouterMetricEntity( String metricName, MetricType metricType, MetricUnit unit, @@ -83,7 +82,6 @@ public enum RouterMetricEntities implements MetricEntities { this.metricEntity = new MetricEntity(metricName, metricType, unit, description, dimensionsList); } - @Override public MetricEntity getMetricEntity() { return metricEntity; } From 81a90abc7c8e11b6bff8a469e357f441ea7f2377 Mon Sep 17 00:00:00 2001 From: Manoj Nagarajan Date: Sun, 24 Nov 2024 09:35:56 -0800 Subject: [PATCH 11/19] fix spotbugs and add some unit tests --- .../venice/stats/VeniceMetricsConfig.java | 3 +- .../venice/stats/VeniceMetricsRepository.java | 4 +- .../VeniceOpenTelemetryMetricsRepository.java | 5 +- .../stats/metrics/MetricEntityState.java | 42 +++---- ...iceOpenTelemetryMetricsRepositoryTest.java | 7 +- .../stats/metrics/MetricEntityStateTest.java | 116 ++++++++++++++++++ .../stats/metrics/MetricEntityTest.java | 55 +++++++++ .../router/stats/RouterHttpRequestStats.java | 26 ++-- 8 files changed, 210 insertions(+), 48 deletions(-) create mode 100644 internal/venice-client-common/src/test/java/com/linkedin/venice/stats/metrics/MetricEntityStateTest.java create mode 100644 internal/venice-client-common/src/test/java/com/linkedin/venice/stats/metrics/MetricEntityTest.java diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceMetricsConfig.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceMetricsConfig.java index 92ae668355..ef78e1171e 100644 --- a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceMetricsConfig.java +++ b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceMetricsConfig.java @@ -5,6 +5,7 @@ import io.opentelemetry.sdk.metrics.export.AggregationTemporalitySelector; import io.opentelemetry.sdk.metrics.export.MetricExporter; import io.tehuti.metrics.MetricConfig; +import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; import java.util.Locale; @@ -174,7 +175,7 @@ private VeniceMetricsConfig(Builder builder) { public static class Builder { private String serviceName = "default_service"; private String metricPrefix = null; - private Collection metricEntities; + private Collection metricEntities = new ArrayList<>(); private boolean emitOtelMetrics = false; private boolean exportOtelMetricsToEndpoint = false; private Map otelCustomDimensionsMap = new HashMap<>(); diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceMetricsRepository.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceMetricsRepository.java index aefdba6dec..679360bcc1 100644 --- a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceMetricsRepository.java +++ b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceMetricsRepository.java @@ -13,8 +13,8 @@ * Next step would be to create a MetricsRepository inside rather than extending it */ public class VeniceMetricsRepository extends MetricsRepository implements Closeable { - private VeniceMetricsConfig veniceMetricsConfig; - private VeniceOpenTelemetryMetricsRepository openTelemetryMetricsRepository; + private final VeniceMetricsConfig veniceMetricsConfig; + private final VeniceOpenTelemetryMetricsRepository openTelemetryMetricsRepository; public VeniceMetricsRepository() { super(); diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceOpenTelemetryMetricsRepository.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceOpenTelemetryMetricsRepository.java index 753bc07e94..80a2e67a9d 100644 --- a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceOpenTelemetryMetricsRepository.java +++ b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceOpenTelemetryMetricsRepository.java @@ -195,7 +195,8 @@ public LongCounter createCounter(MetricEntity metricEntity) { } public Object createInstrument(MetricEntity metricEntity) { - switch (metricEntity.getMetricType()) { + MetricType metricType = metricEntity.getMetricType(); + switch (metricType) { case HISTOGRAM: case MIN_MAX_COUNT_SUM_AGGREGATIONS: return createHistogram(metricEntity); @@ -204,7 +205,7 @@ public Object createInstrument(MetricEntity metricEntity) { return createCounter(metricEntity); default: - throw new VeniceException("Unknown metric type: " + metricEntity.getMetricType()); + throw new VeniceException("Unknown metric type: " + metricType); } } diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/metrics/MetricEntityState.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/metrics/MetricEntityState.java index 959fa3e405..6d12e2af17 100644 --- a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/metrics/MetricEntityState.java +++ b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/metrics/MetricEntityState.java @@ -6,20 +6,19 @@ import io.opentelemetry.api.metrics.LongCounter; import io.tehuti.metrics.MeasurableStat; import io.tehuti.metrics.Sensor; -import io.tehuti.metrics.stats.Percentiles; import java.util.HashMap; import java.util.List; import java.util.Map; /** - * Holds {@link MetricEntity} and the operation state for the metric + * Holds {@link MetricEntity} and 1 Otel metric and its corresponding multiple tehuti Sensors */ public class MetricEntityState { private MetricEntity metricEntity; - // otel metric + // Otel metric private Object otelMetric = null; - // Map of tehuti names and sensors: 1 otel metric can cover multiple tehuti sensors + // Map of tehuti names and sensors: 1 Otel metric can cover multiple Tehuti sensors private Map tehutiSensors = null; public MetricEntityState(MetricEntity metricEntity, VeniceOpenTelemetryMetricsRepository otelRepository) { @@ -36,21 +35,12 @@ public MetricEntityState( createMetric(otelRepository, tehutiMetricInput, registerTehutiSensor); } - // setters - public void setMetricEntity(MetricEntity metricEntity) { - this.metricEntity = metricEntity; - } - public void setOtelMetric(Object otelMetric) { this.otelMetric = otelMetric; } - public void setTehutiSensors(Map tehutiSensors) { - this.tehutiSensors = tehutiSensors; - } - /** - * Add tehuti sensor to tehutiSensors map and throw exception if sensor with same name already exists + * Add Tehuti {@link Sensor} to tehutiSensors map and throw exception if sensor with same name already exists */ public void addTehutiSensors(String name, Sensor tehutiSensor) { if (tehutiSensors == null) { @@ -73,30 +63,22 @@ public void createMetric( VeniceOpenTelemetryMetricsRepository otelRepository, Map> tehutiMetricInput, TehutiSensorRegistrationFunction registerTehutiSensor) { - // Otel metric + // Otel metric: otelRepository will be null if otel is not enabled if (otelRepository != null) { setOtelMetric(otelRepository.createInstrument(this.metricEntity)); } // tehuti metric - // loop through tehutiMetricInput and call registerTehutiSensor for each String, List pair for (Map.Entry> entry: tehutiMetricInput.entrySet()) { - if (entry.getValue().contains(Percentiles.class)) { - addTehutiSensors( - entry.getKey(), - registerTehutiSensor.register(entry.getKey(), entry.getValue().toArray(new MeasurableStat[0]))); - - } else { - addTehutiSensors( - entry.getKey(), - registerTehutiSensor.register(entry.getKey(), entry.getValue().toArray(new MeasurableStat[0]))); - } + addTehutiSensors( + entry.getKey(), + registerTehutiSensor.register(entry.getKey(), entry.getValue().toArray(new MeasurableStat[0]))); } } /** * Record otel metrics */ - private void recordOtelMetric(double value, Attributes otelDimensions) { + void recordOtelMetric(double value, Attributes otelDimensions) { if (otelMetric != null) { MetricType metricType = this.metricEntity.getMetricType(); switch (metricType) { @@ -114,7 +96,7 @@ private void recordOtelMetric(double value, Attributes otelDimensions) { } } - private void recordTehutiMetric(String tehutiMetricName, double value) { + void recordTehutiMetric(String tehutiMetricName, double value) { if (tehutiSensors != null) { Sensor sensor = tehutiSensors.get(tehutiMetricName); if (sensor != null) { @@ -132,4 +114,8 @@ public void record(String tehutiMetricName, double value, Attributes otelDimensi recordOtelMetric(value, otelDimensions); recordTehutiMetric(tehutiMetricName, value); } + + Map getTehutiSensors() { + return tehutiSensors; + } } diff --git a/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/VeniceOpenTelemetryMetricsRepositoryTest.java b/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/VeniceOpenTelemetryMetricsRepositoryTest.java index 89456e3492..7d24139d10 100644 --- a/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/VeniceOpenTelemetryMetricsRepositoryTest.java +++ b/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/VeniceOpenTelemetryMetricsRepositoryTest.java @@ -1,6 +1,5 @@ package com.linkedin.venice.stats; -import static com.linkedin.venice.stats.VeniceMetricsConfig.OTEL_EXPORTER_OTLP_METRICS_ENDPOINT; import static com.linkedin.venice.stats.VeniceOpenTelemetryMetricNamingFormat.transformMetricName; import static com.linkedin.venice.stats.VeniceOpenTelemetryMetricNamingFormat.validateMetricName; import static org.testng.Assert.assertEquals; @@ -14,7 +13,6 @@ import io.opentelemetry.api.metrics.DoubleHistogram; import io.opentelemetry.api.metrics.LongCounter; import io.opentelemetry.sdk.metrics.export.MetricExporter; -import java.util.HashMap; import org.mockito.Mockito; import org.testng.annotations.AfterMethod; import org.testng.annotations.BeforeMethod; @@ -69,12 +67,9 @@ public void testConstructorWithEmitDisabled() { @Test public void testGetOtlpHttpMetricExporterWithValidConfig() { - HashMap otelConfigs = new HashMap<>(); - otelConfigs.put(OTEL_EXPORTER_OTLP_METRICS_ENDPOINT, "http://localhost:4318"); - MetricExporter exporter = metricsRepository.getOtlpHttpMetricExporter(mockMetricsConfig); - // Verify that the exporter is not null and is of the expected type + // Verify that the exporter is not null assertNotNull(exporter); } diff --git a/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/metrics/MetricEntityStateTest.java b/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/metrics/MetricEntityStateTest.java new file mode 100644 index 0000000000..3ce89cbe55 --- /dev/null +++ b/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/metrics/MetricEntityStateTest.java @@ -0,0 +1,116 @@ +package com.linkedin.venice.stats.metrics; + +import static org.mockito.Mockito.*; + +import com.linkedin.venice.stats.VeniceOpenTelemetryMetricsRepository; +import io.opentelemetry.api.common.Attributes; +import io.opentelemetry.api.metrics.DoubleHistogram; +import io.opentelemetry.api.metrics.LongCounter; +import io.tehuti.metrics.MeasurableStat; +import io.tehuti.metrics.Sensor; +import java.util.*; +import org.testng.Assert; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + + +public class MetricEntityStateTest { + private VeniceOpenTelemetryMetricsRepository mockOtelRepository; + private MetricEntity mockMetricEntity; + private MetricEntityState.TehutiSensorRegistrationFunction sensorRegistrationFunction; + private Sensor mockSensor; + + @BeforeMethod + public void setUp() { + mockOtelRepository = mock(VeniceOpenTelemetryMetricsRepository.class); + mockMetricEntity = mock(MetricEntity.class); + sensorRegistrationFunction = (name, stats) -> mock(Sensor.class); + mockSensor = mock(Sensor.class); + } + + @Test + public void testCreateMetricWithOtelEnabled() { + when(mockMetricEntity.getMetricType()).thenReturn(MetricType.COUNTER); + LongCounter longCounter = mock(LongCounter.class); + when(mockOtelRepository.createInstrument(mockMetricEntity)).thenReturn(longCounter); + + Map> tehutiMetricInput = new HashMap<>(); + MetricEntityState metricEntityState = + new MetricEntityState(mockMetricEntity, mockOtelRepository, sensorRegistrationFunction, tehutiMetricInput); + + Assert.assertNotNull(metricEntityState); + Assert.assertNull(metricEntityState.getTehutiSensors()); // No Tehuti sensors added + } + + @Test + public void testAddTehutiSensorsSuccessfully() { + MetricEntityState metricEntityState = new MetricEntityState(mockMetricEntity, mockOtelRepository); + metricEntityState.addTehutiSensors("testSensor", mockSensor); + + Assert.assertNotNull(metricEntityState.getTehutiSensors()); + Assert.assertTrue(metricEntityState.getTehutiSensors().containsKey("testSensor")); + } + + @Test(expectedExceptions = IllegalArgumentException.class, expectedExceptionsMessageRegExp = ".*Sensor with name 'testSensor' already exists.*") + public void testAddTehutiSensorThrowsExceptionOnDuplicate() { + MetricEntityState metricEntityState = new MetricEntityState(mockMetricEntity, mockOtelRepository); + metricEntityState.addTehutiSensors("testSensor", mockSensor); + + // Adding the same sensor name again should throw an exception + metricEntityState.addTehutiSensors("testSensor", mockSensor); + } + + @Test + public void testRecordOtelMetricHistogram() { + DoubleHistogram doubleHistogram = mock(DoubleHistogram.class); + when(mockMetricEntity.getMetricType()).thenReturn(MetricType.HISTOGRAM); + + MetricEntityState metricEntityState = new MetricEntityState(mockMetricEntity, mockOtelRepository); + metricEntityState.setOtelMetric(doubleHistogram); + + Attributes attributes = Attributes.builder().put("key", "value").build(); + metricEntityState.recordOtelMetric(5.5, attributes); + + verify(doubleHistogram, times(1)).record(5.5, attributes); + } + + @Test + public void testRecordOtelMetricCounter() { + LongCounter longCounter = mock(LongCounter.class); + when(mockMetricEntity.getMetricType()).thenReturn(MetricType.COUNTER); + + MetricEntityState metricEntityState = new MetricEntityState(mockMetricEntity, mockOtelRepository); + metricEntityState.setOtelMetric(longCounter); + + Attributes attributes = Attributes.builder().put("key", "value").build(); + metricEntityState.recordOtelMetric(10, attributes); + + verify(longCounter, times(1)).add(10, attributes); + } + + @Test + public void testRecordTehutiMetric() { + MetricEntityState metricEntityState = new MetricEntityState(mockMetricEntity, mockOtelRepository); + metricEntityState.addTehutiSensors("testSensor", mockSensor); + + metricEntityState.recordTehutiMetric("testSensor", 15.0); + + verify(mockSensor, times(1)).record(15.0); + } + + @Test + public void testRecordMetricsWithBothOtelAndTehuti() { + DoubleHistogram doubleHistogram = mock(DoubleHistogram.class); + when(mockMetricEntity.getMetricType()).thenReturn(MetricType.HISTOGRAM); + + MetricEntityState metricEntityState = new MetricEntityState(mockMetricEntity, mockOtelRepository); + metricEntityState.setOtelMetric(doubleHistogram); + metricEntityState.addTehutiSensors("testSensor", mockSensor); + + Attributes attributes = Attributes.builder().put("key", "value").build(); + metricEntityState.record("testSensor", 20.0, attributes); + + verify(doubleHistogram, times(1)).record(20.0, attributes); + verify(mockSensor, times(1)).record(20.0); + } +} diff --git a/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/metrics/MetricEntityTest.java b/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/metrics/MetricEntityTest.java new file mode 100644 index 0000000000..f1fe09ff8d --- /dev/null +++ b/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/metrics/MetricEntityTest.java @@ -0,0 +1,55 @@ +package com.linkedin.venice.stats.metrics; + +import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.VENICE_CLUSTER_NAME; +import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.VENICE_STORE_NAME; + +import com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions; +import java.util.HashSet; +import java.util.Set; +import org.testng.Assert; +import org.testng.annotations.Test; + + +public class MetricEntityTest { + @Test + public void testMetricEntityConstructorWithoutDimensions() { + String metricName = "testMetric"; + MetricType metricType = MetricType.COUNTER; + MetricUnit unit = MetricUnit.MILLISECOND; + String description = "Test description"; + + MetricEntity metricEntity = new MetricEntity(metricName, metricType, unit, description); + + Assert.assertEquals(metricEntity.getMetricName(), metricName, "Metric name should match"); + Assert.assertEquals(metricEntity.getMetricType(), metricType, "Metric type should match"); + Assert.assertEquals(metricEntity.getUnit(), unit, "Metric unit should match"); + Assert.assertEquals(metricEntity.getDescription(), description, "Description should match"); + Assert.assertNull(metricEntity.getDimensionsList(), "Dimensions list should be null"); + } + + @Test + public void testMetricEntityConstructorWithDimensions() { + String metricName = "testMetric"; + MetricType metricType = MetricType.COUNTER; + MetricUnit unit = MetricUnit.MILLISECOND; + String description = "Test description with dimensions"; + + Set dimensions = new HashSet<>(); + dimensions.add(VENICE_STORE_NAME); + dimensions.add(VENICE_CLUSTER_NAME); + + MetricEntity metricEntity = new MetricEntity(metricName, metricType, unit, description, dimensions); + + Assert.assertEquals(metricEntity.getMetricName(), metricName, "Metric name should match"); + Assert.assertEquals(metricEntity.getMetricType(), metricType, "Metric type should match"); + Assert.assertEquals(metricEntity.getUnit(), unit, "Metric unit should match"); + Assert.assertEquals(metricEntity.getDescription(), description, "Description should match"); + Assert.assertNotNull(metricEntity.getDimensionsList(), "Dimensions list should not be null"); + Assert.assertEquals(metricEntity.getDimensionsList(), dimensions, "Dimensions list should match"); + } + + @Test(expectedExceptions = IllegalArgumentException.class) + public void testMetricEntityConstructorWithEmptyName() { + new MetricEntity("", MetricType.COUNTER, MetricUnit.MILLISECOND, "Empty name test"); + } +} diff --git a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouterHttpRequestStats.java b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouterHttpRequestStats.java index 2c8c04b5d2..b3daca9f08 100644 --- a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouterHttpRequestStats.java +++ b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouterHttpRequestStats.java @@ -172,7 +172,7 @@ public RouterHttpRequestStats( incomingRequestMetric = new MetricEntityState( INCOMING_CALL_COUNT.getMetricEntity(), otelRepository, - this::registerSensor, + this::registerSensorFinal, new HashMap>() { { put(MetricNamesInTehuti.INCOMING_REQUEST, Arrays.asList(new Count(), requestRate)); @@ -186,7 +186,7 @@ public RouterHttpRequestStats( requestMetric = new MetricEntityState( CALL_COUNT.getMetricEntity(), otelRepository, - this::registerSensor, + this::registerSensorFinal, new HashMap>() { { put(MetricNamesInTehuti.HEALTHY_REQUEST, Arrays.asList(new Count(), healthyRequestRate)); @@ -201,7 +201,7 @@ public RouterHttpRequestStats( latencyMetric = new MetricEntityState( CALL_TIME.getMetricEntity(), otelRepository, - this::registerSensor, + this::registerSensorFinal, new HashMap>() { { put( @@ -221,7 +221,7 @@ public RouterHttpRequestStats( retryCountMetric = new MetricEntityState( RETRY_COUNT.getMetricEntity(), otelRepository, - this::registerSensor, + this::registerSensorFinal, new HashMap>() { { put(MetricNamesInTehuti.ERROR_RETRY, Collections.singletonList(new Count())); @@ -231,7 +231,7 @@ public RouterHttpRequestStats( allowedRetryCountMetric = new MetricEntityState( ALLOWED_RETRY_COUNT.getMetricEntity(), otelRepository, - this::registerSensor, + this::registerSensorFinal, new HashMap>() { { put(MetricNamesInTehuti.ALLOWED_RETRY_REQUEST, Collections.singletonList(new OccurrenceRate())); @@ -241,7 +241,7 @@ public RouterHttpRequestStats( disallowedRetryCountMetric = new MetricEntityState( DISALLOWED_RETRY_COUNT.getMetricEntity(), otelRepository, - this::registerSensor, + this::registerSensorFinal, new HashMap>() { { put(MetricNamesInTehuti.DISALLOWED_RETRY_REQUEST, Collections.singletonList(new OccurrenceRate())); @@ -251,7 +251,7 @@ public RouterHttpRequestStats( retryDelayMetric = new MetricEntityState( RETRY_DELAY.getMetricEntity(), otelRepository, - this::registerSensor, + this::registerSensorFinal, new HashMap>() { { put(MetricNamesInTehuti.RETRY_DELAY, Arrays.asList(new Avg(), new Max())); @@ -261,7 +261,7 @@ public RouterHttpRequestStats( abortedRetryCountMetric = new MetricEntityState( ABORTED_RETRY_COUNT.getMetricEntity(), otelRepository, - this::registerSensor, + this::registerSensorFinal, new HashMap>() { { put(MetricNamesInTehuti.DELAY_CONSTRAINT_ABORTED_RETRY_REQUEST, Collections.singletonList(new Count())); @@ -274,7 +274,7 @@ public RouterHttpRequestStats( keyCountMetric = new MetricEntityState( CALL_KEY_COUNT.getMetricEntity(), otelRepository, - this::registerSensor, + this::registerSensorFinal, new HashMap>() { { put(MetricNamesInTehuti.KEY_NUM, Arrays.asList(new OccurrenceRate(), new Avg(), new Max(0))); @@ -649,6 +649,14 @@ protected Sensor registerSensor(String sensorName, MeasurableStat... stats) { return super.registerSensor(systemStoreName == null ? sensorName : systemStoreName, null, stats); } + /** + * This method will be passed to the constructor of {@link MetricEntityState} to register tehuti sensor. + * Only private/static/final methods can be passed onto the constructor. + */ + private Sensor registerSensorFinal(String sensorName, MeasurableStat... stats) { + return this.registerSensor(sensorName, stats); + } + static public boolean hasInFlightRequests() { Metric metric = localMetricRepo.getMetric("total_inflight_request_count"); // max return -infinity when there are no samples. validate only against finite value From 3b13435839ed95c2fa8cca2028fb8caf775443b9 Mon Sep 17 00:00:00 2001 From: Manoj Nagarajan Date: Mon, 25 Nov 2024 02:54:16 -0800 Subject: [PATCH 12/19] add some unit tests --- .../stats/metrics/MetricEntityStateTest.java | 9 +- .../router/stats/RouterHttpRequestStats.java | 16 +- .../router/stats/RouterMetricEntityTest.java | 184 ++++++++++++++++++ 3 files changed, 199 insertions(+), 10 deletions(-) create mode 100644 services/venice-router/src/test/java/com/linkedin/venice/router/stats/RouterMetricEntityTest.java diff --git a/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/metrics/MetricEntityStateTest.java b/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/metrics/MetricEntityStateTest.java index 3ce89cbe55..d67000af57 100644 --- a/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/metrics/MetricEntityStateTest.java +++ b/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/metrics/MetricEntityStateTest.java @@ -1,6 +1,9 @@ package com.linkedin.venice.stats.metrics; -import static org.mockito.Mockito.*; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; import com.linkedin.venice.stats.VeniceOpenTelemetryMetricsRepository; import io.opentelemetry.api.common.Attributes; @@ -8,7 +11,9 @@ import io.opentelemetry.api.metrics.LongCounter; import io.tehuti.metrics.MeasurableStat; import io.tehuti.metrics.Sensor; -import java.util.*; +import java.util.HashMap; +import java.util.List; +import java.util.Map; import org.testng.Assert; import org.testng.annotations.BeforeMethod; import org.testng.annotations.Test; diff --git a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouterHttpRequestStats.java b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouterHttpRequestStats.java index b3daca9f08..0d01e6623d 100644 --- a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouterHttpRequestStats.java +++ b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouterHttpRequestStats.java @@ -438,10 +438,10 @@ public void recordThrottledRequest(HttpResponseStatus responseStatus) { } public void recordErrorRetryCount() { - recordRetryTriggeredSensorOtel(MetricNamesInTehuti.ERROR_RETRY, RequestRetryType.ERROR_RETRY); + recordRetryTriggeredSensorOtel(RequestRetryType.ERROR_RETRY); } - public void recordRetryTriggeredSensorOtel(String tetutiMetricName, RequestRetryType retryType) { + public void recordRetryTriggeredSensorOtel(RequestRetryType retryType) { Attributes dimensions = null; if (emitOpenTelemetryMetrics) { dimensions = Attributes.builder() @@ -449,10 +449,10 @@ public void recordRetryTriggeredSensorOtel(String tetutiMetricName, RequestRetry .put(getDimensionName(VENICE_REQUEST_RETRY_TYPE), retryType.getRetryType()) .build(); } - retryCountMetric.record(tetutiMetricName, 1, dimensions); + retryCountMetric.record(MetricNamesInTehuti.ERROR_RETRY, 1, dimensions); } - public void recordAbortedRetrySensorOtel(String tetutiMetricName, RequestRetryAbortReason abortReason) { + public void recordAbortedRetrySensorOtel(String tehutiMetricName, RequestRetryAbortReason abortReason) { Attributes dimensions = null; if (emitOpenTelemetryMetrics) { dimensions = Attributes.builder() @@ -460,7 +460,7 @@ public void recordAbortedRetrySensorOtel(String tetutiMetricName, RequestRetryAb .put(getDimensionName(VENICE_REQUEST_RETRY_ABORT_REASON), abortReason.getAbortReason()) .build(); } - abortedRetryCountMetric.record(tetutiMetricName, 1, dimensions); + abortedRetryCountMetric.record(tehutiMetricName, 1, dimensions); } public void recordBadRequest(HttpResponseStatus responseStatus) { @@ -494,7 +494,7 @@ public void recordLatencyMetric( if (emitOpenTelemetryMetrics) { dimensions = Attributes.builder() .putAll(commonMetricDimensions) - // only add HTTP_RESPONSE_STATUS_CODE_CATEGORY to reduce the cardinality for histogram + // Don't add HTTP_RESPONSE_STATUS_CODE to reduce the cardinality for histogram .put( getDimensionName(HTTP_RESPONSE_STATUS_CODE_CATEGORY), getVeniceHttpResponseStatusCodeCategory(responseStatus)) @@ -505,7 +505,7 @@ public void recordLatencyMetric( } public void recordRequestMetric( - String tetutiMetricName, + String tehutiMetricName, HttpResponseStatus responseStatus, VeniceResponseStatusCategory veniceResponseStatusCategory) { Attributes dimensions = null; @@ -519,7 +519,7 @@ public void recordRequestMetric( .put(getDimensionName(HTTP_RESPONSE_STATUS_CODE), responseStatus.codeAsText().toString()) .build(); } - requestMetric.record(tetutiMetricName, 1, dimensions); + requestMetric.record(tehutiMetricName, 1, dimensions); } public void recordResponseWaitingTime(double waitingTime) { diff --git a/services/venice-router/src/test/java/com/linkedin/venice/router/stats/RouterMetricEntityTest.java b/services/venice-router/src/test/java/com/linkedin/venice/router/stats/RouterMetricEntityTest.java new file mode 100644 index 0000000000..8755ba2aeb --- /dev/null +++ b/services/venice-router/src/test/java/com/linkedin/venice/router/stats/RouterMetricEntityTest.java @@ -0,0 +1,184 @@ +package com.linkedin.venice.router.stats; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertNotNull; +import static org.testng.Assert.assertTrue; + +import com.linkedin.venice.router.RouterServer; +import com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions; +import com.linkedin.venice.stats.metrics.MetricEntity; +import com.linkedin.venice.stats.metrics.MetricType; +import com.linkedin.venice.stats.metrics.MetricUnit; +import com.linkedin.venice.utils.Utils; +import java.util.Collection; +import java.util.HashMap; +import java.util.Map; +import java.util.Objects; +import org.testng.annotations.Test; + + +public class RouterMetricEntityTest { + @Test + public void testRouterMetricEntities() { + Map expectedMetrics = new HashMap<>(); + + expectedMetrics.put( + RouterMetricEntity.INCOMING_CALL_COUNT, + new MetricEntity( + "incoming_call_count", + MetricType.COUNTER, + MetricUnit.NUMBER, + "Count of all incoming requests", + Utils.setOf( + VeniceMetricsDimensions.VENICE_STORE_NAME, + VeniceMetricsDimensions.VENICE_CLUSTER_NAME, + VeniceMetricsDimensions.VENICE_REQUEST_METHOD))); + expectedMetrics.put( + RouterMetricEntity.CALL_COUNT, + new MetricEntity( + "call_count", + MetricType.COUNTER, + MetricUnit.NUMBER, + "Count of all requests with response details", + Utils.setOf( + VeniceMetricsDimensions.VENICE_STORE_NAME, + VeniceMetricsDimensions.VENICE_CLUSTER_NAME, + VeniceMetricsDimensions.VENICE_REQUEST_METHOD, + VeniceMetricsDimensions.HTTP_RESPONSE_STATUS_CODE, + VeniceMetricsDimensions.HTTP_RESPONSE_STATUS_CODE_CATEGORY, + VeniceMetricsDimensions.VENICE_RESPONSE_STATUS_CODE_CATEGORY))); + expectedMetrics.put( + RouterMetricEntity.CALL_TIME, + new MetricEntity( + "call_time", + MetricType.HISTOGRAM, + MetricUnit.MILLISECOND, + "Latency based on all responses", + Utils.setOf( + VeniceMetricsDimensions.VENICE_STORE_NAME, + VeniceMetricsDimensions.VENICE_CLUSTER_NAME, + VeniceMetricsDimensions.VENICE_REQUEST_METHOD, + VeniceMetricsDimensions.HTTP_RESPONSE_STATUS_CODE_CATEGORY, + VeniceMetricsDimensions.VENICE_RESPONSE_STATUS_CODE_CATEGORY))); + expectedMetrics.put( + RouterMetricEntity.CALL_KEY_COUNT, + new MetricEntity( + "call_key_count", + MetricType.MIN_MAX_COUNT_SUM_AGGREGATIONS, + MetricUnit.NUMBER, + "Count of keys in multi key requests", + Utils.setOf( + VeniceMetricsDimensions.VENICE_STORE_NAME, + VeniceMetricsDimensions.VENICE_CLUSTER_NAME, + VeniceMetricsDimensions.VENICE_REQUEST_METHOD, + VeniceMetricsDimensions.VENICE_REQUEST_VALIDATION_OUTCOME))); + expectedMetrics.put( + RouterMetricEntity.RETRY_COUNT, + new MetricEntity( + "retry_count", + MetricType.COUNTER, + MetricUnit.NUMBER, + "Count of retries triggered", + Utils.setOf( + VeniceMetricsDimensions.VENICE_STORE_NAME, + VeniceMetricsDimensions.VENICE_CLUSTER_NAME, + VeniceMetricsDimensions.VENICE_REQUEST_METHOD, + VeniceMetricsDimensions.VENICE_REQUEST_RETRY_TYPE))); + expectedMetrics.put( + RouterMetricEntity.ALLOWED_RETRY_COUNT, + new MetricEntity( + "allowed_retry_count", + MetricType.COUNTER, + MetricUnit.NUMBER, + "Count of allowed retry requests", + Utils.setOf( + VeniceMetricsDimensions.VENICE_STORE_NAME, + VeniceMetricsDimensions.VENICE_CLUSTER_NAME, + VeniceMetricsDimensions.VENICE_REQUEST_METHOD))); + expectedMetrics.put( + RouterMetricEntity.DISALLOWED_RETRY_COUNT, + new MetricEntity( + "disallowed_retry_count", + MetricType.COUNTER, + MetricUnit.NUMBER, + "Count of disallowed retry requests", + Utils.setOf( + VeniceMetricsDimensions.VENICE_STORE_NAME, + VeniceMetricsDimensions.VENICE_CLUSTER_NAME, + VeniceMetricsDimensions.VENICE_REQUEST_METHOD))); + expectedMetrics.put( + RouterMetricEntity.RETRY_DELAY, + new MetricEntity( + "retry_delay", + MetricType.MIN_MAX_COUNT_SUM_AGGREGATIONS, + MetricUnit.MILLISECOND, + "Retry delay time", + Utils.setOf( + VeniceMetricsDimensions.VENICE_STORE_NAME, + VeniceMetricsDimensions.VENICE_CLUSTER_NAME, + VeniceMetricsDimensions.VENICE_REQUEST_METHOD))); + expectedMetrics.put( + RouterMetricEntity.ABORTED_RETRY_COUNT, + new MetricEntity( + "aborted_retry_count", + MetricType.COUNTER, + MetricUnit.NUMBER, + "Count of aborted retry requests", + Utils.setOf( + VeniceMetricsDimensions.VENICE_STORE_NAME, + VeniceMetricsDimensions.VENICE_CLUSTER_NAME, + VeniceMetricsDimensions.VENICE_REQUEST_METHOD, + VeniceMetricsDimensions.VENICE_REQUEST_RETRY_ABORT_REASON))); + + for (RouterMetricEntity metric: RouterMetricEntity.values()) { + MetricEntity actual = metric.getMetricEntity(); + MetricEntity expected = expectedMetrics.get(metric); + + assertNotNull(expected, "No expected definition for " + metric.name()); + assertNotNull(actual.getMetricName(), "Metric name should not be null for " + metric.name()); + assertEquals(actual.getMetricName(), expected.getMetricName(), "Unexpected metric name for " + metric.name()); + assertNotNull(actual.getMetricType(), "Metric type should not be null for " + metric.name()); + assertEquals(actual.getMetricType(), expected.getMetricType(), "Unexpected metric type for " + metric.name()); + assertNotNull(actual.getUnit(), "Metric unit should not be null for " + metric.name()); + assertEquals(actual.getUnit(), expected.getUnit(), "Unexpected metric unit for " + metric.name()); + assertNotNull(actual.getDescription(), "Metric description should not be null for " + metric.name()); + assertEquals( + actual.getDescription(), + expected.getDescription(), + "Unexpected metric description for " + metric.name()); + assertNotNull(actual.getDimensionsList(), "Metric dimensions should not be null for " + metric.name()); + assertEquals( + actual.getDimensionsList(), + expected.getDimensionsList(), + "Unexpected metric dimensions for " + metric.name()); + } + + // Convert expectedMetrics to a Collection for comparison + Collection expectedMetricEntities = expectedMetrics.values(); + + // Assert size + assertEquals( + RouterServer.ROUTER_SERVICE_METRIC_ENTITIES.size(), + expectedMetricEntities.size(), + "Unexpected size of ROUTER_SERVICE_METRIC_ENTITIES"); + + // Assert contents + for (MetricEntity actual: RouterServer.ROUTER_SERVICE_METRIC_ENTITIES) { + boolean found = false; + for (MetricEntity expected: expectedMetricEntities) { + if (metricEntitiesEqual(actual, expected)) { + found = true; + break; + } + } + assertTrue(found, "Unexpected MetricEntity found: " + actual.getMetricName()); + } + } + + private boolean metricEntitiesEqual(MetricEntity actual, MetricEntity expected) { + return Objects.equals(actual.getMetricName(), expected.getMetricName()) + && actual.getMetricType() == expected.getMetricType() && actual.getUnit() == expected.getUnit() + && Objects.equals(actual.getDescription(), expected.getDescription()) + && Objects.equals(actual.getDimensionsList(), expected.getDimensionsList()); + } +} From 71a36e8fca84a26333ef20382fe26382ed6946ec Mon Sep 17 00:00:00 2001 From: Manoj Nagarajan Date: Mon, 25 Nov 2024 10:38:04 -0800 Subject: [PATCH 13/19] fix compile error after rebase --- .../venice/listener/ReadQuotaEnforcementHandlerTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/venice-server/src/test/java/com/linkedin/venice/listener/ReadQuotaEnforcementHandlerTest.java b/services/venice-server/src/test/java/com/linkedin/venice/listener/ReadQuotaEnforcementHandlerTest.java index 1cae4ef4f3..0bd9b7e893 100644 --- a/services/venice-server/src/test/java/com/linkedin/venice/listener/ReadQuotaEnforcementHandlerTest.java +++ b/services/venice-server/src/test/java/com/linkedin/venice/listener/ReadQuotaEnforcementHandlerTest.java @@ -373,7 +373,7 @@ public void testInitWithPreExistingResource() { storeRepository, CompletableFuture.completedFuture(customizedViewRepository), thisNodeId, - new AggServerQuotaUsageStats(metricsRepository), + new AggServerQuotaUsageStats(serverConfig.getClusterName(), metricsRepository), metricsRepository, clock); String storeName = "testStore"; From 468a673801e693df3fb1678315a6412655e6f03e Mon Sep 17 00:00:00 2001 From: Manoj Nagarajan Date: Mon, 25 Nov 2024 12:33:48 -0800 Subject: [PATCH 14/19] address review comments --- .../venice/stats/AbstractVeniceAggStats.java | 22 +++++++---- .../stats/AbstractVeniceAggStatsTest.java | 37 +++++++++++++++++++ 2 files changed, 51 insertions(+), 8 deletions(-) create mode 100644 internal/venice-client-common/src/test/java/com/linkedin/venice/stats/AbstractVeniceAggStatsTest.java diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/AbstractVeniceAggStats.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/AbstractVeniceAggStats.java index b6d852f6b8..82fb1905c3 100644 --- a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/AbstractVeniceAggStats.java +++ b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/AbstractVeniceAggStats.java @@ -36,20 +36,26 @@ public void setStatsSupplier(StatsSupplier statsSupplier) { this.totalStats = statsSupplier.get(metricsRepository, STORE_NAME_FOR_TOTAL_STAT, clusterName, null); } + /** + * clusterName is used to create per cluster aggregate stats and {@link com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions}
+ * If perClusterAggregate is true, it will create per cluster aggregates with storeName as "total." + */ public AbstractVeniceAggStats( String clusterName, MetricsRepository metricsRepository, StatsSupplier statsSupplier, boolean perClusterAggregate) { - this( - clusterName, + if (perClusterAggregate && clusterName == null) { + throw new IllegalArgumentException("perClusterAggregate cannot be true when clusterName is null"); + } + this.clusterName = clusterName; + this.metricsRepository = metricsRepository; + this.statsFactory = statsSupplier; + this.totalStats = statsSupplier.get( metricsRepository, - statsSupplier, - statsSupplier.get( - metricsRepository, - perClusterAggregate ? STORE_NAME_FOR_TOTAL_STAT + "." + clusterName : STORE_NAME_FOR_TOTAL_STAT, - clusterName, - null)); + perClusterAggregate ? STORE_NAME_FOR_TOTAL_STAT + "." + clusterName : STORE_NAME_FOR_TOTAL_STAT, + clusterName, + null); } public T getStoreStats(String storeName) { diff --git a/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/AbstractVeniceAggStatsTest.java b/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/AbstractVeniceAggStatsTest.java new file mode 100644 index 0000000000..98d3721600 --- /dev/null +++ b/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/AbstractVeniceAggStatsTest.java @@ -0,0 +1,37 @@ +package com.linkedin.venice.stats; + +import static org.mockito.Mockito.mock; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.fail; + +import com.linkedin.venice.client.stats.ClientStats; +import io.tehuti.metrics.MetricsRepository; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + + +public class AbstractVeniceAggStatsTest { + @DataProvider(name = "ClusterName-And-Boolean") + public Object[][] fcRequestTypes() { + return new Object[][] { { null, false }, { null, true }, { "test-cluster", false }, { "test-cluster", true } }; + } + + @Test(dataProvider = "ClusterName-And-Boolean") + public void abstractVeniceAggStatsWithNoClusterName(String clusterName, boolean perClusterAggregate) { + MetricsRepository metricsRepository = mock(MetricsRepository.class); + StatsSupplier statsSupplier = mock(StatsSupplier.class); + try { + new AbstractVeniceAggStats(clusterName, metricsRepository, statsSupplier, perClusterAggregate) { + }; + if (clusterName == null && perClusterAggregate) { + fail("Expected IllegalArgumentException"); + } + } catch (IllegalArgumentException e) { + if (clusterName == null && perClusterAggregate) { + assertEquals(e.getMessage(), "perClusterAggregate cannot be true when clusterName is null"); + } else { + fail("IllegalArgumentException not expected"); + } + } + } +} From d121c652b2e072c80c170425233e398cf47e266a Mon Sep 17 00:00:00 2001 From: Manoj Nagarajan Date: Mon, 25 Nov 2024 14:24:51 -0800 Subject: [PATCH 15/19] address review comments --- .../dimensions/RequestRetryAbortReason.java | 7 +- .../stats/dimensions/RequestRetryType.java | 6 +- .../dimensions/RequestValidationOutcome.java | 6 +- .../VeniceResponseStatusCategory.java | 6 +- .../RequestRetryAbortReasonTest.java | 2 +- .../router/stats/RouterHttpRequestStats.java | 232 +++++++++--------- 6 files changed, 129 insertions(+), 130 deletions(-) diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/dimensions/RequestRetryAbortReason.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/dimensions/RequestRetryAbortReason.java index 0ec21db216..250352341e 100644 --- a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/dimensions/RequestRetryAbortReason.java +++ b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/dimensions/RequestRetryAbortReason.java @@ -1,13 +1,12 @@ package com.linkedin.venice.stats.dimensions; public enum RequestRetryAbortReason { - SLOW_ROUTE("slow_route"), DELAY_CONSTRAINT("delay_constraint"), MAX_RETRY_ROUTE_LIMIT("max_retry_router_limit"), - NO_AVAILABLE_REPLICA("no_available_replica"); + SLOW_ROUTE, DELAY_CONSTRAINT, MAX_RETRY_ROUTE_LIMIT, NO_AVAILABLE_REPLICA; private final String abortReason; - RequestRetryAbortReason(String abortReason) { - this.abortReason = abortReason; + RequestRetryAbortReason() { + this.abortReason = name().toLowerCase(); } public String getAbortReason() { diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/dimensions/RequestRetryType.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/dimensions/RequestRetryType.java index 4aa022c7a0..1ef7a36964 100644 --- a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/dimensions/RequestRetryType.java +++ b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/dimensions/RequestRetryType.java @@ -1,12 +1,12 @@ package com.linkedin.venice.stats.dimensions; public enum RequestRetryType { - ERROR_RETRY("error_retry"), LONG_TAIL_RETRY("long_tail_retry"); + ERROR_RETRY, LONG_TAIL_RETRY; private final String retryType; - RequestRetryType(String retryType) { - this.retryType = retryType; + RequestRetryType() { + this.retryType = name().toLowerCase(); } public String getRetryType() { diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/dimensions/RequestValidationOutcome.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/dimensions/RequestValidationOutcome.java index f549403704..84b6ce30c5 100644 --- a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/dimensions/RequestValidationOutcome.java +++ b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/dimensions/RequestValidationOutcome.java @@ -1,12 +1,12 @@ package com.linkedin.venice.stats.dimensions; public enum RequestValidationOutcome { - VALID("valid"), INVALID_KEY_COUNT_LIMIT_EXCEEDED("invalid_key_count_limit_exceeded"); + VALID, INVALID_KEY_COUNT_LIMIT_EXCEEDED; private final String outcome; - RequestValidationOutcome(String outcome) { - this.outcome = outcome; + RequestValidationOutcome() { + this.outcome = name().toLowerCase(); } public String getOutcome() { diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceResponseStatusCategory.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceResponseStatusCategory.java index 887ad509bb..761c30cfdf 100644 --- a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceResponseStatusCategory.java +++ b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceResponseStatusCategory.java @@ -8,12 +8,12 @@ * to account for all healthy requests. This dimensions makes it easier to make Venice specific aggregations. */ public enum VeniceResponseStatusCategory { - HEALTHY("healthy"), UNHEALTHY("unhealthy"), TARDY("tardy"), THROTTLED("throttled"), BAD_REQUEST("bad_request"); + HEALTHY, UNHEALTHY, TARDY, THROTTLED, BAD_REQUEST; private final String category; - VeniceResponseStatusCategory(String category) { - this.category = category; + VeniceResponseStatusCategory() { + this.category = name().toLowerCase(); } public String getCategory() { diff --git a/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/dimensions/RequestRetryAbortReasonTest.java b/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/dimensions/RequestRetryAbortReasonTest.java index 84c86d0de5..f7f39d9b16 100644 --- a/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/dimensions/RequestRetryAbortReasonTest.java +++ b/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/dimensions/RequestRetryAbortReasonTest.java @@ -17,7 +17,7 @@ public void testRetryRequestAbortReason() { assertEquals(reason.getAbortReason(), "delay_constraint"); break; case MAX_RETRY_ROUTE_LIMIT: - assertEquals(reason.getAbortReason(), "max_retry_router_limit"); + assertEquals(reason.getAbortReason(), "max_retry_route_limit"); break; case NO_AVAILABLE_REPLICA: assertEquals(reason.getAbortReason(), "no_available_replica"); diff --git a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouterHttpRequestStats.java b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouterHttpRequestStats.java index 0d01e6623d..057ac1983c 100644 --- a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouterHttpRequestStats.java +++ b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouterHttpRequestStats.java @@ -23,6 +23,7 @@ import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.VENICE_RESPONSE_STATUS_CODE_CATEGORY; import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.VENICE_STORE_NAME; +import com.linkedin.alpini.base.misc.CollectionUtil; import com.linkedin.alpini.router.monitoring.ScatterGatherStats; import com.linkedin.venice.common.VeniceSystemStoreUtils; import com.linkedin.venice.read.RequestType; @@ -39,6 +40,7 @@ import com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions; import com.linkedin.venice.stats.dimensions.VeniceResponseStatusCategory; import com.linkedin.venice.stats.metrics.MetricEntityState; +import com.linkedin.venice.stats.metrics.TehutiMetricNameEnum; import io.netty.handler.codec.http.HttpResponseStatus; import io.opentelemetry.api.common.Attributes; import io.opentelemetry.api.common.AttributesBuilder; @@ -57,7 +59,6 @@ import io.tehuti.metrics.stats.Total; import java.util.Arrays; import java.util.Collections; -import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.concurrent.TimeUnit; @@ -173,114 +174,109 @@ public RouterHttpRequestStats( INCOMING_CALL_COUNT.getMetricEntity(), otelRepository, this::registerSensorFinal, - new HashMap>() { - { - put(MetricNamesInTehuti.INCOMING_REQUEST, Arrays.asList(new Count(), requestRate)); - } - }); + CollectionUtil.>mapBuilder() + .put(RouterTehutiMetricNameEnum.REQUEST, Arrays.asList(new Count(), requestRate)) + .build()); healthyRequestRateSensor = registerSensor(new TehutiUtils.SimpleRatioStat(healthyRequestRate, requestRate, "healthy_request_ratio")); tardyRequestRatioSensor = registerSensor(new TehutiUtils.SimpleRatioStat(tardyRequestRate, requestRate, "tardy_request_ratio")); + requestMetric = new MetricEntityState( CALL_COUNT.getMetricEntity(), otelRepository, this::registerSensorFinal, - new HashMap>() { - { - put(MetricNamesInTehuti.HEALTHY_REQUEST, Arrays.asList(new Count(), healthyRequestRate)); - put(MetricNamesInTehuti.UNHEALTHY_REQUEST, Collections.singletonList(new Count())); - put(MetricNamesInTehuti.TARDY_REQUEST, Arrays.asList(new Count(), tardyRequestRate)); - put(MetricNamesInTehuti.THROTTLED_REQUEST, Collections.singletonList(new Count())); - put(MetricNamesInTehuti.BAD_REQUEST, Collections.singletonList(new Count())); - } - }); + CollectionUtil.>mapBuilder() + .put(RouterTehutiMetricNameEnum.HEALTHY_REQUEST, Arrays.asList(new Count(), healthyRequestRate)) + .put(RouterTehutiMetricNameEnum.UNHEALTHY_REQUEST, Collections.singletonList(new Count())) + .put(RouterTehutiMetricNameEnum.TARDY_REQUEST, Arrays.asList(new Count(), tardyRequestRate)) + .put(RouterTehutiMetricNameEnum.THROTTLED_REQUEST, Collections.singletonList(new Count())) + .put(RouterTehutiMetricNameEnum.BAD_REQUEST, Collections.singletonList(new Count())) + .build()); latencyTehutiSensor = registerSensorWithDetailedPercentiles("latency", new Avg(), new Max(0)); latencyMetric = new MetricEntityState( CALL_TIME.getMetricEntity(), otelRepository, this::registerSensorFinal, - new HashMap>() { - { - put( - MetricNamesInTehuti.HEALTHY_REQUEST_LATENCY, + CollectionUtil.>mapBuilder() + .put( + RouterTehutiMetricNameEnum.HEALTHY_REQUEST_LATENCY, Arrays.asList( new Avg(), new Max(0), TehutiUtils.getPercentileStatForNetworkLatency( getName(), - getFullMetricName(MetricNamesInTehuti.HEALTHY_REQUEST_LATENCY)))); - put(MetricNamesInTehuti.UNHEALTHY_REQUEST_LATENCY, Arrays.asList(new Avg(), new Max(0))); - put(MetricNamesInTehuti.TARDY_REQUEST_LATENCY, Arrays.asList(new Avg(), new Max(0))); - put(MetricNamesInTehuti.THROTTLED_REQUEST_LATENCY, Arrays.asList(new Avg(), new Max(0))); - } - }); + getFullMetricName(RouterTehutiMetricNameEnum.HEALTHY_REQUEST_LATENCY.getMetricName())))) + .put(RouterTehutiMetricNameEnum.UNHEALTHY_REQUEST_LATENCY, Arrays.asList(new Avg(), new Max(0))) + .put(RouterTehutiMetricNameEnum.TARDY_REQUEST_LATENCY, Arrays.asList(new Avg(), new Max(0))) + .put(RouterTehutiMetricNameEnum.THROTTLED_REQUEST_LATENCY, Arrays.asList(new Avg(), new Max(0))) + .build()); retryCountMetric = new MetricEntityState( RETRY_COUNT.getMetricEntity(), otelRepository, this::registerSensorFinal, - new HashMap>() { - { - put(MetricNamesInTehuti.ERROR_RETRY, Collections.singletonList(new Count())); - } - }); + CollectionUtil.>mapBuilder() + .put(RouterTehutiMetricNameEnum.ERROR_RETRY, Collections.singletonList(new Count())) + .build()); allowedRetryCountMetric = new MetricEntityState( ALLOWED_RETRY_COUNT.getMetricEntity(), otelRepository, this::registerSensorFinal, - new HashMap>() { - { - put(MetricNamesInTehuti.ALLOWED_RETRY_REQUEST, Collections.singletonList(new OccurrenceRate())); - } - }); + CollectionUtil.>mapBuilder() + .put( + RouterTehutiMetricNameEnum.ALLOWED_RETRY_REQUEST_COUNT, + Collections.singletonList(new OccurrenceRate())) + .build()); disallowedRetryCountMetric = new MetricEntityState( DISALLOWED_RETRY_COUNT.getMetricEntity(), otelRepository, this::registerSensorFinal, - new HashMap>() { - { - put(MetricNamesInTehuti.DISALLOWED_RETRY_REQUEST, Collections.singletonList(new OccurrenceRate())); - } - }); + CollectionUtil.>mapBuilder() + .put( + RouterTehutiMetricNameEnum.DISALLOWED_RETRY_REQUEST_COUNT, + Collections.singletonList(new OccurrenceRate())) + .build()); retryDelayMetric = new MetricEntityState( RETRY_DELAY.getMetricEntity(), otelRepository, this::registerSensorFinal, - new HashMap>() { - { - put(MetricNamesInTehuti.RETRY_DELAY, Arrays.asList(new Avg(), new Max())); - } - }); + CollectionUtil.>mapBuilder() + .put(RouterTehutiMetricNameEnum.RETRY_DELAY, Arrays.asList(new Avg(), new Max())) + .build()); abortedRetryCountMetric = new MetricEntityState( ABORTED_RETRY_COUNT.getMetricEntity(), otelRepository, this::registerSensorFinal, - new HashMap>() { - { - put(MetricNamesInTehuti.DELAY_CONSTRAINT_ABORTED_RETRY_REQUEST, Collections.singletonList(new Count())); - put(MetricNamesInTehuti.SLOW_ROUTE_ABORTED_RETRY_REQUEST, Collections.singletonList(new Count())); - put(MetricNamesInTehuti.RETRY_ROUTE_LIMIT_ABORTED_RETRY_REQUEST, Collections.singletonList(new Count())); - put(MetricNamesInTehuti.NO_AVAILABLE_REPLICA_ABORTED_RETRY_REQUEST, Collections.singletonList(new Count())); - } - }); + CollectionUtil.>mapBuilder() + .put( + RouterTehutiMetricNameEnum.DELAY_CONSTRAINT_ABORTED_RETRY_REQUEST, + Collections.singletonList(new Count())) + .put(RouterTehutiMetricNameEnum.SLOW_ROUTE_ABORTED_RETRY_REQUEST, Collections.singletonList(new Count())) + .put( + RouterTehutiMetricNameEnum.RETRY_ROUTE_LIMIT_ABORTED_RETRY_REQUEST, + Collections.singletonList(new Count())) + .put( + RouterTehutiMetricNameEnum.NO_AVAILABLE_REPLICA_ABORTED_RETRY_REQUEST, + Collections.singletonList(new Count())) + .build()); keyCountMetric = new MetricEntityState( CALL_KEY_COUNT.getMetricEntity(), otelRepository, this::registerSensorFinal, - new HashMap>() { - { - put(MetricNamesInTehuti.KEY_NUM, Arrays.asList(new OccurrenceRate(), new Avg(), new Max(0))); - put(MetricNamesInTehuti.BAD_REQUEST_KEY_COUNT, Arrays.asList(new OccurrenceRate(), new Avg(), new Max(0))); - } - }); + CollectionUtil.>mapBuilder() + .put(RouterTehutiMetricNameEnum.KEY_NUM, Arrays.asList(new OccurrenceRate(), new Avg(), new Max(0))) + .put( + RouterTehutiMetricNameEnum.BAD_REQUEST_KEY_COUNT, + Arrays.asList(new OccurrenceRate(), new Avg(), new Max(0))) + .build()); errorRetryAttemptTriggeredByPendingRequestCheckSensor = registerSensor("error_retry_attempt_triggered_by_pending_request_check", new OccurrenceRate()); @@ -371,28 +367,31 @@ private String getDimensionName(VeniceMetricsDimensions dimension) { * types of requests also have their latencies logged at the same time. */ public void recordIncomingRequest() { - incomingRequestMetric.record(MetricNamesInTehuti.INCOMING_REQUEST, 1, commonMetricDimensions); + incomingRequestMetric.record(RouterTehutiMetricNameEnum.REQUEST, 1, commonMetricDimensions); inFlightRequestSensor.record(currentInFlightRequest.incrementAndGet()); totalInflightRequestSensor.record(); } public void recordHealthyRequest(Double latency, HttpResponseStatus responseStatus) { - String metricNameInTehuti = MetricNamesInTehuti.HEALTHY_REQUEST; + TehutiMetricNameEnum tehutiMetricNameEnum = RouterTehutiMetricNameEnum.HEALTHY_REQUEST; VeniceResponseStatusCategory veniceResponseStatusCategory = VeniceResponseStatusCategory.HEALTHY; - recordRequestMetric(metricNameInTehuti, responseStatus, veniceResponseStatusCategory); + recordRequestMetric(tehutiMetricNameEnum, responseStatus, veniceResponseStatusCategory); if (latency != null) { - recordLatencyMetric(metricNameInTehuti, latency, responseStatus, veniceResponseStatusCategory); + recordLatencyMetric(tehutiMetricNameEnum, latency, responseStatus, veniceResponseStatusCategory); } } public void recordUnhealthyRequest(HttpResponseStatus responseStatus) { - recordRequestMetric(MetricNamesInTehuti.UNHEALTHY_REQUEST, responseStatus, VeniceResponseStatusCategory.UNHEALTHY); + recordRequestMetric( + RouterTehutiMetricNameEnum.UNHEALTHY_REQUEST, + responseStatus, + VeniceResponseStatusCategory.UNHEALTHY); } public void recordUnhealthyRequest(double latency, HttpResponseStatus responseStatus) { recordUnhealthyRequest(responseStatus); recordLatencyMetric( - MetricNamesInTehuti.UNHEALTHY_REQUEST_LATENCY, + RouterTehutiMetricNameEnum.UNHEALTHY_REQUEST_LATENCY, latency, responseStatus, VeniceResponseStatusCategory.UNHEALTHY); @@ -411,16 +410,16 @@ public void recordReadQuotaUsage(int quotaUsage) { } public void recordTardyRequest(double latency, HttpResponseStatus responseStatus) { - String metricNameInTehuti = MetricNamesInTehuti.TARDY_REQUEST; + TehutiMetricNameEnum tehutiMetricNameEnum = RouterTehutiMetricNameEnum.TARDY_REQUEST; VeniceResponseStatusCategory veniceResponseStatusCategory = VeniceResponseStatusCategory.TARDY; - recordRequestMetric(metricNameInTehuti, responseStatus, veniceResponseStatusCategory); - recordLatencyMetric(metricNameInTehuti, latency, responseStatus, veniceResponseStatusCategory); + recordRequestMetric(tehutiMetricNameEnum, responseStatus, veniceResponseStatusCategory); + recordLatencyMetric(tehutiMetricNameEnum, latency, responseStatus, veniceResponseStatusCategory); } public void recordThrottledRequest(double latency, HttpResponseStatus responseStatus) { recordThrottledRequest(responseStatus); recordLatencyMetric( - MetricNamesInTehuti.THROTTLED_REQUEST_LATENCY, + RouterTehutiMetricNameEnum.THROTTLED_REQUEST_LATENCY, latency, responseStatus, VeniceResponseStatusCategory.THROTTLED); @@ -434,7 +433,10 @@ public void recordThrottledRequest(double latency, HttpResponseStatus responseSt * TODO: Remove this overload after fixing the above. */ public void recordThrottledRequest(HttpResponseStatus responseStatus) { - recordRequestMetric(MetricNamesInTehuti.THROTTLED_REQUEST, responseStatus, VeniceResponseStatusCategory.THROTTLED); + recordRequestMetric( + RouterTehutiMetricNameEnum.THROTTLED_REQUEST, + responseStatus, + VeniceResponseStatusCategory.THROTTLED); } public void recordErrorRetryCount() { @@ -449,10 +451,12 @@ public void recordRetryTriggeredSensorOtel(RequestRetryType retryType) { .put(getDimensionName(VENICE_REQUEST_RETRY_TYPE), retryType.getRetryType()) .build(); } - retryCountMetric.record(MetricNamesInTehuti.ERROR_RETRY, 1, dimensions); + retryCountMetric.record(RouterTehutiMetricNameEnum.ERROR_RETRY, 1, dimensions); } - public void recordAbortedRetrySensorOtel(String tehutiMetricName, RequestRetryAbortReason abortReason) { + public void recordAbortedRetrySensorOtel( + TehutiMetricNameEnum tehutiMetricNameEnum, + RequestRetryAbortReason abortReason) { Attributes dimensions = null; if (emitOpenTelemetryMetrics) { dimensions = Attributes.builder() @@ -460,11 +464,14 @@ public void recordAbortedRetrySensorOtel(String tehutiMetricName, RequestRetryAb .put(getDimensionName(VENICE_REQUEST_RETRY_ABORT_REASON), abortReason.getAbortReason()) .build(); } - abortedRetryCountMetric.record(tehutiMetricName, 1, dimensions); + abortedRetryCountMetric.record(tehutiMetricNameEnum, 1, dimensions); } public void recordBadRequest(HttpResponseStatus responseStatus) { - recordRequestMetric(MetricNamesInTehuti.BAD_REQUEST, responseStatus, VeniceResponseStatusCategory.BAD_REQUEST); + recordRequestMetric( + RouterTehutiMetricNameEnum.BAD_REQUEST, + responseStatus, + VeniceResponseStatusCategory.BAD_REQUEST); } public void recordBadRequestKeyCount(int keyCount) { @@ -486,7 +493,7 @@ public void recordLatency(double latency) { } public void recordLatencyMetric( - String tehutiMetricName, + TehutiMetricNameEnum tehutiMetricNameEnum, double latency, HttpResponseStatus responseStatus, VeniceResponseStatusCategory veniceResponseStatusCategory) { @@ -501,11 +508,11 @@ public void recordLatencyMetric( .put(getDimensionName(VENICE_RESPONSE_STATUS_CODE_CATEGORY), veniceResponseStatusCategory.getCategory()) .build(); } - latencyMetric.record(tehutiMetricName, latency, dimensions); + latencyMetric.record(tehutiMetricNameEnum, latency, dimensions); } public void recordRequestMetric( - String tehutiMetricName, + TehutiMetricNameEnum tehutiMetricNameEnum, HttpResponseStatus responseStatus, VeniceResponseStatusCategory veniceResponseStatusCategory) { Attributes dimensions = null; @@ -519,7 +526,7 @@ public void recordRequestMetric( .put(getDimensionName(HTTP_RESPONSE_STATUS_CODE), responseStatus.codeAsText().toString()) .build(); } - requestMetric.record(tehutiMetricName, 1, dimensions); + requestMetric.record(tehutiMetricNameEnum, 1, dimensions); } public void recordResponseWaitingTime(double waitingTime) { @@ -562,7 +569,7 @@ public void recordKeyCountMetric(int keyNum, RequestValidationOutcome outcome) { .put(getDimensionName(VENICE_REQUEST_VALIDATION_OUTCOME), outcome.getOutcome()) .build(); } - keyCountMetric.record(MetricNamesInTehuti.KEY_NUM, keyNum, dimensions); + keyCountMetric.record(RouterTehutiMetricNameEnum.KEY_NUM, keyNum, dimensions); } public void recordRequestUsage(int usage) { @@ -587,25 +594,25 @@ public void recordUnavailableRequest() { public void recordDelayConstraintAbortedRetryRequest() { recordAbortedRetrySensorOtel( - MetricNamesInTehuti.DELAY_CONSTRAINT_ABORTED_RETRY_REQUEST, + RouterTehutiMetricNameEnum.DELAY_CONSTRAINT_ABORTED_RETRY_REQUEST, RequestRetryAbortReason.DELAY_CONSTRAINT); } public void recordSlowRouteAbortedRetryRequest() { recordAbortedRetrySensorOtel( - MetricNamesInTehuti.SLOW_ROUTE_ABORTED_RETRY_REQUEST, + RouterTehutiMetricNameEnum.SLOW_ROUTE_ABORTED_RETRY_REQUEST, RequestRetryAbortReason.SLOW_ROUTE); } public void recordRetryRouteLimitAbortedRetryRequest() { recordAbortedRetrySensorOtel( - MetricNamesInTehuti.RETRY_ROUTE_LIMIT_ABORTED_RETRY_REQUEST, + RouterTehutiMetricNameEnum.RETRY_ROUTE_LIMIT_ABORTED_RETRY_REQUEST, RequestRetryAbortReason.MAX_RETRY_ROUTE_LIMIT); } public void recordNoAvailableReplicaAbortedRetryRequest() { recordAbortedRetrySensorOtel( - MetricNamesInTehuti.NO_AVAILABLE_REPLICA_ABORTED_RETRY_REQUEST, + RouterTehutiMetricNameEnum.NO_AVAILABLE_REPLICA_ABORTED_RETRY_REQUEST, RequestRetryAbortReason.NO_AVAILABLE_REPLICA); } @@ -625,11 +632,12 @@ public void recordResponse() { } public void recordAllowedRetryRequest() { - allowedRetryCountMetric.record(MetricNamesInTehuti.ALLOWED_RETRY_REQUEST, 1, commonMetricDimensions); + allowedRetryCountMetric.record(RouterTehutiMetricNameEnum.ALLOWED_RETRY_REQUEST_COUNT, 1, commonMetricDimensions); } public void recordDisallowedRetryRequest() { - disallowedRetryCountMetric.record(MetricNamesInTehuti.DISALLOWED_RETRY_REQUEST, 1, commonMetricDimensions); + disallowedRetryCountMetric + .record(RouterTehutiMetricNameEnum.DISALLOWED_RETRY_REQUEST_COUNT, 1, commonMetricDimensions); } public void recordErrorRetryAttemptTriggeredByPendingRequestCheck() { @@ -637,7 +645,7 @@ public void recordErrorRetryAttemptTriggeredByPendingRequestCheck() { } public void recordRetryDelay(double delay) { - retryDelayMetric.record(MetricNamesInTehuti.RETRY_DELAY, delay, commonMetricDimensions); + retryDelayMetric.record(RouterTehutiMetricNameEnum.RETRY_DELAY, delay, commonMetricDimensions); } public void recordMetaStoreShadowRead() { @@ -666,44 +674,36 @@ static public boolean hasInFlightRequests() { /** * Metric names for tehuti metrics used in this class */ - private static class MetricNamesInTehuti { + private static enum RouterTehutiMetricNameEnum implements TehutiMetricNameEnum { /** for {@link RouterMetricEntity#INCOMING_CALL_COUNT} */ - private final static String INCOMING_REQUEST = "request"; - + REQUEST, /** for {@link RouterMetricEntity#CALL_COUNT} */ - private final static String HEALTHY_REQUEST = "healthy_request"; - private final static String UNHEALTHY_REQUEST = "unhealthy_request"; - private final static String TARDY_REQUEST = "tardy_request"; - private final static String THROTTLED_REQUEST = "throttled_request"; - private final static String BAD_REQUEST = "bad_request"; - + HEALTHY_REQUEST, UNHEALTHY_REQUEST, TARDY_REQUEST, THROTTLED_REQUEST, BAD_REQUEST, /** for {@link RouterMetricEntity#CALL_TIME} */ - private final static String HEALTHY_REQUEST_LATENCY = "healthy_request_latency"; - private final static String UNHEALTHY_REQUEST_LATENCY = "unhealthy_request_latency"; - private final static String TARDY_REQUEST_LATENCY = "tardy_request_latency"; - private final static String THROTTLED_REQUEST_LATENCY = "throttled_request_latency"; - + HEALTHY_REQUEST_LATENCY, UNHEALTHY_REQUEST_LATENCY, TARDY_REQUEST_LATENCY, THROTTLED_REQUEST_LATENCY, /** for {@link RouterMetricEntity#RETRY_COUNT} */ - private final static String ERROR_RETRY = "error_retry"; - + ERROR_RETRY, /** for {@link RouterMetricEntity#ALLOWED_RETRY_COUNT} */ - private final static String ALLOWED_RETRY_REQUEST = "allowed_retry_request_count"; - + ALLOWED_RETRY_REQUEST_COUNT, /** for {@link RouterMetricEntity#DISALLOWED_RETRY_COUNT} */ - private final static String DISALLOWED_RETRY_REQUEST = "disallowed_retry_request_count"; - + DISALLOWED_RETRY_REQUEST_COUNT, /** for {@link RouterMetricEntity#RETRY_DELAY} */ - private final static String RETRY_DELAY = "retry_delay"; - + RETRY_DELAY, /** for {@link RouterMetricEntity#ABORTED_RETRY_COUNT} */ - private final static String DELAY_CONSTRAINT_ABORTED_RETRY_REQUEST = "delay_constraint_aborted_retry_request"; - private final static String SLOW_ROUTE_ABORTED_RETRY_REQUEST = "slow_route_aborted_retry_request"; - private final static String RETRY_ROUTE_LIMIT_ABORTED_RETRY_REQUEST = "retry_route_limit_aborted_retry_request"; - private final static String NO_AVAILABLE_REPLICA_ABORTED_RETRY_REQUEST = - "no_available_replica_aborted_retry_request"; - + DELAY_CONSTRAINT_ABORTED_RETRY_REQUEST, SLOW_ROUTE_ABORTED_RETRY_REQUEST, RETRY_ROUTE_LIMIT_ABORTED_RETRY_REQUEST, + NO_AVAILABLE_REPLICA_ABORTED_RETRY_REQUEST, /** for {@link RouterMetricEntity#CALL_KEY_COUNT} */ - private final static String KEY_NUM = "key_num"; - private final static String BAD_REQUEST_KEY_COUNT = "bad_request_key_count"; + KEY_NUM, BAD_REQUEST_KEY_COUNT; + + private final String metricName; + + RouterTehutiMetricNameEnum() { + this.metricName = name().toLowerCase(); + } + + @Override + public String getMetricName() { + return this.metricName; + } } } From df172b9400dc39f65b18de83cd4f2f8f71073bf6 Mon Sep 17 00:00:00 2001 From: Manoj Nagarajan Date: Mon, 25 Nov 2024 14:28:45 -0800 Subject: [PATCH 16/19] use enum for tehuti names rather than strings --- .../stats/metrics/MetricEntityState.java | 27 +++++++------- .../stats/metrics/TehutiMetricNameEnum.java | 8 +++++ .../stats/metrics/MetricEntityStateTest.java | 35 +++++++++++++------ .../router/stats/RouterHttpRequestStats.java | 2 +- 4 files changed, 48 insertions(+), 24 deletions(-) create mode 100644 internal/venice-client-common/src/main/java/com/linkedin/venice/stats/metrics/TehutiMetricNameEnum.java diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/metrics/MetricEntityState.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/metrics/MetricEntityState.java index 6d12e2af17..e53f1d760a 100644 --- a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/metrics/MetricEntityState.java +++ b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/metrics/MetricEntityState.java @@ -19,7 +19,7 @@ public class MetricEntityState { // Otel metric private Object otelMetric = null; // Map of tehuti names and sensors: 1 Otel metric can cover multiple Tehuti sensors - private Map tehutiSensors = null; + private Map tehutiSensors = null; public MetricEntityState(MetricEntity metricEntity, VeniceOpenTelemetryMetricsRepository otelRepository) { this.metricEntity = metricEntity; @@ -30,7 +30,7 @@ public MetricEntityState( MetricEntity metricEntity, VeniceOpenTelemetryMetricsRepository otelRepository, TehutiSensorRegistrationFunction registerTehutiSensor, - Map> tehutiMetricInput) { + Map> tehutiMetricInput) { this.metricEntity = metricEntity; createMetric(otelRepository, tehutiMetricInput, registerTehutiSensor); } @@ -42,7 +42,7 @@ public void setOtelMetric(Object otelMetric) { /** * Add Tehuti {@link Sensor} to tehutiSensors map and throw exception if sensor with same name already exists */ - public void addTehutiSensors(String name, Sensor tehutiSensor) { + public void addTehutiSensors(TehutiMetricNameEnum name, Sensor tehutiSensor) { if (tehutiSensors == null) { tehutiSensors = new HashMap<>(); } @@ -61,17 +61,18 @@ public interface TehutiSensorRegistrationFunction { public void createMetric( VeniceOpenTelemetryMetricsRepository otelRepository, - Map> tehutiMetricInput, + Map> tehutiMetricInput, TehutiSensorRegistrationFunction registerTehutiSensor) { // Otel metric: otelRepository will be null if otel is not enabled if (otelRepository != null) { setOtelMetric(otelRepository.createInstrument(this.metricEntity)); } // tehuti metric - for (Map.Entry> entry: tehutiMetricInput.entrySet()) { + for (Map.Entry> entry: tehutiMetricInput.entrySet()) { addTehutiSensors( entry.getKey(), - registerTehutiSensor.register(entry.getKey(), entry.getValue().toArray(new MeasurableStat[0]))); + registerTehutiSensor + .register(entry.getKey().getMetricName(), entry.getValue().toArray(new MeasurableStat[0]))); } } @@ -96,26 +97,26 @@ void recordOtelMetric(double value, Attributes otelDimensions) { } } - void recordTehutiMetric(String tehutiMetricName, double value) { + void recordTehutiMetric(TehutiMetricNameEnum tehutiMetricNameEnum, double value) { if (tehutiSensors != null) { - Sensor sensor = tehutiSensors.get(tehutiMetricName); + Sensor sensor = tehutiSensors.get(tehutiMetricNameEnum); if (sensor != null) { sensor.record(value); } } } - public void record(String tehutiMetricName, long value, Attributes otelDimensions) { + public void record(TehutiMetricNameEnum tehutiMetricNameEnum, long value, Attributes otelDimensions) { recordOtelMetric(value, otelDimensions); - recordTehutiMetric(tehutiMetricName, value); + recordTehutiMetric(tehutiMetricNameEnum, value); } - public void record(String tehutiMetricName, double value, Attributes otelDimensions) { + public void record(TehutiMetricNameEnum tehutiMetricNameEnum, double value, Attributes otelDimensions) { recordOtelMetric(value, otelDimensions); - recordTehutiMetric(tehutiMetricName, value); + recordTehutiMetric(tehutiMetricNameEnum, value); } - Map getTehutiSensors() { + Map getTehutiSensors() { return tehutiSensors; } } diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/metrics/TehutiMetricNameEnum.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/metrics/TehutiMetricNameEnum.java new file mode 100644 index 0000000000..ab53f7253d --- /dev/null +++ b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/metrics/TehutiMetricNameEnum.java @@ -0,0 +1,8 @@ +package com.linkedin.venice.stats.metrics; + +/** + * Metric entity class to define a metric with all its properties + */ +public interface TehutiMetricNameEnum { + String getMetricName(); +} diff --git a/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/metrics/MetricEntityStateTest.java b/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/metrics/MetricEntityStateTest.java index d67000af57..3e2248df96 100644 --- a/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/metrics/MetricEntityStateTest.java +++ b/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/metrics/MetricEntityStateTest.java @@ -25,6 +25,21 @@ public class MetricEntityStateTest { private MetricEntityState.TehutiSensorRegistrationFunction sensorRegistrationFunction; private Sensor mockSensor; + private enum TestTehutiMetricNameEnum implements TehutiMetricNameEnum { + TEST_METRIC; + + private final String metricName; + + TestTehutiMetricNameEnum() { + this.metricName = this.name().toLowerCase(); + } + + @Override + public String getMetricName() { + return this.metricName; + } + } + @BeforeMethod public void setUp() { mockOtelRepository = mock(VeniceOpenTelemetryMetricsRepository.class); @@ -39,7 +54,7 @@ public void testCreateMetricWithOtelEnabled() { LongCounter longCounter = mock(LongCounter.class); when(mockOtelRepository.createInstrument(mockMetricEntity)).thenReturn(longCounter); - Map> tehutiMetricInput = new HashMap<>(); + Map> tehutiMetricInput = new HashMap<>(); MetricEntityState metricEntityState = new MetricEntityState(mockMetricEntity, mockOtelRepository, sensorRegistrationFunction, tehutiMetricInput); @@ -50,19 +65,19 @@ public void testCreateMetricWithOtelEnabled() { @Test public void testAddTehutiSensorsSuccessfully() { MetricEntityState metricEntityState = new MetricEntityState(mockMetricEntity, mockOtelRepository); - metricEntityState.addTehutiSensors("testSensor", mockSensor); + metricEntityState.addTehutiSensors(TestTehutiMetricNameEnum.TEST_METRIC, mockSensor); Assert.assertNotNull(metricEntityState.getTehutiSensors()); - Assert.assertTrue(metricEntityState.getTehutiSensors().containsKey("testSensor")); + Assert.assertTrue(metricEntityState.getTehutiSensors().containsKey(TestTehutiMetricNameEnum.TEST_METRIC)); } - @Test(expectedExceptions = IllegalArgumentException.class, expectedExceptionsMessageRegExp = ".*Sensor with name 'testSensor' already exists.*") + @Test(expectedExceptions = IllegalArgumentException.class, expectedExceptionsMessageRegExp = ".*Sensor with name 'TEST_METRIC' already exists.*") public void testAddTehutiSensorThrowsExceptionOnDuplicate() { MetricEntityState metricEntityState = new MetricEntityState(mockMetricEntity, mockOtelRepository); - metricEntityState.addTehutiSensors("testSensor", mockSensor); + metricEntityState.addTehutiSensors(TestTehutiMetricNameEnum.TEST_METRIC, mockSensor); // Adding the same sensor name again should throw an exception - metricEntityState.addTehutiSensors("testSensor", mockSensor); + metricEntityState.addTehutiSensors(TestTehutiMetricNameEnum.TEST_METRIC, mockSensor); } @Test @@ -96,9 +111,9 @@ public void testRecordOtelMetricCounter() { @Test public void testRecordTehutiMetric() { MetricEntityState metricEntityState = new MetricEntityState(mockMetricEntity, mockOtelRepository); - metricEntityState.addTehutiSensors("testSensor", mockSensor); + metricEntityState.addTehutiSensors(TestTehutiMetricNameEnum.TEST_METRIC, mockSensor); - metricEntityState.recordTehutiMetric("testSensor", 15.0); + metricEntityState.recordTehutiMetric(TestTehutiMetricNameEnum.TEST_METRIC, 15.0); verify(mockSensor, times(1)).record(15.0); } @@ -110,10 +125,10 @@ public void testRecordMetricsWithBothOtelAndTehuti() { MetricEntityState metricEntityState = new MetricEntityState(mockMetricEntity, mockOtelRepository); metricEntityState.setOtelMetric(doubleHistogram); - metricEntityState.addTehutiSensors("testSensor", mockSensor); + metricEntityState.addTehutiSensors(TestTehutiMetricNameEnum.TEST_METRIC, mockSensor); Attributes attributes = Attributes.builder().put("key", "value").build(); - metricEntityState.record("testSensor", 20.0, attributes); + metricEntityState.record(TestTehutiMetricNameEnum.TEST_METRIC, 20.0, attributes); verify(doubleHistogram, times(1)).record(20.0, attributes); verify(mockSensor, times(1)).record(20.0); diff --git a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouterHttpRequestStats.java b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouterHttpRequestStats.java index 057ac1983c..d1245b31e9 100644 --- a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouterHttpRequestStats.java +++ b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouterHttpRequestStats.java @@ -674,7 +674,7 @@ static public boolean hasInFlightRequests() { /** * Metric names for tehuti metrics used in this class */ - private static enum RouterTehutiMetricNameEnum implements TehutiMetricNameEnum { + private enum RouterTehutiMetricNameEnum implements TehutiMetricNameEnum { /** for {@link RouterMetricEntity#INCOMING_CALL_COUNT} */ REQUEST, /** for {@link RouterMetricEntity#CALL_COUNT} */ From 5c28a4b3169a384d24c06401b3aa4a4f8412a088 Mon Sep 17 00:00:00 2001 From: Manoj Nagarajan Date: Tue, 26 Nov 2024 01:44:19 -0800 Subject: [PATCH 17/19] Address some nit comments --- .../linkedin/venice/stats/VeniceMetricsRepository.java | 7 +++++-- .../stats/VeniceOpenTelemetryMetricsRepository.java | 3 ++- .../linkedin/venice/stats/metrics/MetricEntityState.java | 9 ++++++--- .../venice/stats/metrics/TehutiMetricNameEnum.java | 2 +- .../venice/integration/utils/VeniceRouterWrapper.java | 1 + 5 files changed, 15 insertions(+), 7 deletions(-) diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceMetricsRepository.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceMetricsRepository.java index 679360bcc1..811b3587f9 100644 --- a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceMetricsRepository.java +++ b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceMetricsRepository.java @@ -9,8 +9,11 @@ /** - * extends {@link MetricsRepository} to keep the changes to a minimum. - * Next step would be to create a MetricsRepository inside rather than extending it + * Repository to hold both tehuti and OpenTelemetry metrics. + * This class extends {@link MetricsRepository} to keep the changes to a minimum and + * to avoid a breaking change.
+ * Once all components are migrated to use this class: make this class add {@link MetricsRepository} + * as a member variable and delegate all tehuti calls to it. */ public class VeniceMetricsRepository extends MetricsRepository implements Closeable { private final VeniceMetricsConfig veniceMetricsConfig; diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceOpenTelemetryMetricsRepository.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceOpenTelemetryMetricsRepository.java index 80a2e67a9d..912f8619a2 100644 --- a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceOpenTelemetryMetricsRepository.java +++ b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceOpenTelemetryMetricsRepository.java @@ -245,11 +245,12 @@ public CompletableResultCode shutdown() { } } - // for testing purpose + /** for testing purposes */ SdkMeterProvider getSdkMeterProvider() { return sdkMeterProvider; } + /** for testing purposes */ Meter getMeter() { return meter; } diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/metrics/MetricEntityState.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/metrics/MetricEntityState.java index e53f1d760a..2f9a823122 100644 --- a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/metrics/MetricEntityState.java +++ b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/metrics/MetricEntityState.java @@ -12,13 +12,16 @@ /** - * Holds {@link MetricEntity} and 1 Otel metric and its corresponding multiple tehuti Sensors + * Operational state of a metric. It holds
+ * 1. {@link MetricEntity} + * 2. 1 Otel Instrument and + * 3. multiple tehuti Sensors for this Otel Metric */ public class MetricEntityState { private MetricEntity metricEntity; - // Otel metric + /** Otel metric */ private Object otelMetric = null; - // Map of tehuti names and sensors: 1 Otel metric can cover multiple Tehuti sensors + /** Map of tehuti names and sensors: 1 Otel metric can cover multiple Tehuti sensors */ private Map tehutiSensors = null; public MetricEntityState(MetricEntity metricEntity, VeniceOpenTelemetryMetricsRepository otelRepository) { diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/metrics/TehutiMetricNameEnum.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/metrics/TehutiMetricNameEnum.java index ab53f7253d..27bc937b16 100644 --- a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/metrics/TehutiMetricNameEnum.java +++ b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/metrics/TehutiMetricNameEnum.java @@ -1,7 +1,7 @@ package com.linkedin.venice.stats.metrics; /** - * Metric entity class to define a metric with all its properties + * Interface for creating metric names enum for tehuti metrics */ public interface TehutiMetricNameEnum { String getMetricName(); diff --git a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/integration/utils/VeniceRouterWrapper.java b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/integration/utils/VeniceRouterWrapper.java index bec8d062f5..d913d64fed 100644 --- a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/integration/utils/VeniceRouterWrapper.java +++ b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/integration/utils/VeniceRouterWrapper.java @@ -163,6 +163,7 @@ static StatefulServiceProvider generateService( .put(MAX_READ_CAPACITY, DEFAULT_PER_ROUTER_READ_QUOTA) .put(SYSTEM_SCHEMA_CLUSTER_NAME, clusterName) .put(ROUTER_STORAGE_NODE_CLIENT_TYPE, StorageNodeClientType.APACHE_HTTP_ASYNC_CLIENT.name()) + // OpenTelemetry configs .put(OTEL_VENICE_ENABLED, Boolean.TRUE.toString()) .put(OTEL_VENICE_EXPORT_TO_LOG, Boolean.TRUE.toString()) .put(OTEL_VENICE_EXPORT_TO_ENDPOINT, Boolean.TRUE.toString()) From fd88c8c4fc49d9c74db059b264be7ad89e8e6cdd Mon Sep 17 00:00:00 2001 From: Manoj Nagarajan Date: Tue, 26 Nov 2024 11:57:07 -0800 Subject: [PATCH 18/19] Add 'metrics' in some of the otel config names --- .../venice/stats/VeniceMetricsConfig.java | 20 ++++---- .../venice/stats/VeniceMetricsConfigTest.java | 46 +++++++++---------- .../utils/VeniceRouterWrapper.java | 12 ++--- 3 files changed, 39 insertions(+), 39 deletions(-) diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceMetricsConfig.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceMetricsConfig.java index ef78e1171e..e954c0f379 100644 --- a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceMetricsConfig.java +++ b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceMetricsConfig.java @@ -27,12 +27,12 @@ public class VeniceMetricsConfig { /** * Config to enable OpenTelemetry metrics */ - public static final String OTEL_VENICE_ENABLED = "otel.venice.enabled"; + public static final String OTEL_VENICE_METRICS_ENABLED = "otel.venice.metrics.enabled"; /** * Config to set the metric prefix for OpenTelemetry metrics */ - public static final String OTEL_VENICE_METRIC_PREFIX = "otel.venice.metric.prefix"; + public static final String OTEL_VENICE_METRICS_PREFIX = "otel.venice.metrics.prefix"; /** * Config to set the naming format for OpenTelemetry metrics @@ -44,13 +44,13 @@ public class VeniceMetricsConfig { * Export opentelemetry metrics to a log exporter * {@link VeniceOpenTelemetryMetricsRepository.LogBasedMetricExporter} */ - public static final String OTEL_VENICE_EXPORT_TO_LOG = "otel.venice.export.to.log"; + public static final String OTEL_VENICE_METRICS_EXPORT_TO_LOG = "otel.venice.metrics.export.to.log"; /** * Export opentelemetry metrics to {@link #OTEL_EXPORTER_OTLP_METRICS_ENDPOINT} * over {@link #OTEL_EXPORTER_OTLP_METRICS_PROTOCOL} */ - public static final String OTEL_VENICE_EXPORT_TO_ENDPOINT = "otel.venice.export.to.endpoint"; + public static final String OTEL_VENICE_METRICS_EXPORT_TO_ENDPOINT = "otel.venice.metrics.export.to.endpoint"; /** * Config Map to add custom dimensions to the metrics: Can be used for system dimensions @@ -62,7 +62,7 @@ public class VeniceMetricsConfig { * Multiple headers are separated by ',' * For example: "custom_dimension_one=value1,custom_dimension_two=value2,custom_dimension_three=value3" */ - public static final String OTEL_VENICE_CUSTOM_DIMENSIONS_MAP = "otel.venice.custom.dimensions.map"; + public static final String OTEL_VENICE_METRICS_CUSTOM_DIMENSIONS_MAP = "otel.venice.metrics.custom.dimensions.map"; /** * Protocol over which the metrics are exported to {@link #OTEL_EXPORTER_OTLP_METRICS_ENDPOINT}
@@ -262,19 +262,19 @@ public Builder setOtelExponentialHistogramMaxBuckets(int otelExponentialHistogra */ public Builder extractAndSetOtelConfigs(Map configs) { String configValue; - if ((configValue = configs.get(OTEL_VENICE_ENABLED)) != null) { + if ((configValue = configs.get(OTEL_VENICE_METRICS_ENABLED)) != null) { setEmitOtelMetrics(Boolean.parseBoolean(configValue)); } - if ((configValue = configs.get(OTEL_VENICE_METRIC_PREFIX)) != null) { + if ((configValue = configs.get(OTEL_VENICE_METRICS_PREFIX)) != null) { setMetricPrefix(configValue); } - if ((configValue = configs.get(OTEL_VENICE_EXPORT_TO_LOG)) != null) { + if ((configValue = configs.get(OTEL_VENICE_METRICS_EXPORT_TO_LOG)) != null) { setExportOtelMetricsToLog(Boolean.parseBoolean(configValue)); } - if ((configValue = configs.get(OTEL_VENICE_EXPORT_TO_ENDPOINT)) != null) { + if ((configValue = configs.get(OTEL_VENICE_METRICS_EXPORT_TO_ENDPOINT)) != null) { setExportOtelMetricsToEndpoint(Boolean.parseBoolean(configValue)); } @@ -282,7 +282,7 @@ public Builder extractAndSetOtelConfigs(Map configs) { * custom dimensions are passed as key=value pairs separated by '='
* Multiple dimensions are separated by ',' */ - if ((configValue = configs.get(OTEL_VENICE_CUSTOM_DIMENSIONS_MAP)) != null) { + if ((configValue = configs.get(OTEL_VENICE_METRICS_CUSTOM_DIMENSIONS_MAP)) != null) { String[] dimensions = configValue.split(","); for (String dimension: dimensions) { String[] keyValue = dimension.split("="); diff --git a/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/VeniceMetricsConfigTest.java b/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/VeniceMetricsConfigTest.java index 4cfb4f6da9..e7f04ff205 100644 --- a/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/VeniceMetricsConfigTest.java +++ b/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/VeniceMetricsConfigTest.java @@ -6,10 +6,10 @@ import static com.linkedin.venice.stats.VeniceMetricsConfig.OTEL_EXPORTER_OTLP_METRICS_ENDPOINT; import static com.linkedin.venice.stats.VeniceMetricsConfig.OTEL_EXPORTER_OTLP_METRICS_PROTOCOL; import static com.linkedin.venice.stats.VeniceMetricsConfig.OTEL_EXPORTER_OTLP_METRICS_TEMPORALITY_PREFERENCE; -import static com.linkedin.venice.stats.VeniceMetricsConfig.OTEL_VENICE_CUSTOM_DIMENSIONS_MAP; -import static com.linkedin.venice.stats.VeniceMetricsConfig.OTEL_VENICE_ENABLED; -import static com.linkedin.venice.stats.VeniceMetricsConfig.OTEL_VENICE_EXPORT_TO_ENDPOINT; -import static com.linkedin.venice.stats.VeniceMetricsConfig.OTEL_VENICE_EXPORT_TO_LOG; +import static com.linkedin.venice.stats.VeniceMetricsConfig.OTEL_VENICE_METRICS_CUSTOM_DIMENSIONS_MAP; +import static com.linkedin.venice.stats.VeniceMetricsConfig.OTEL_VENICE_METRICS_ENABLED; +import static com.linkedin.venice.stats.VeniceMetricsConfig.OTEL_VENICE_METRICS_EXPORT_TO_ENDPOINT; +import static com.linkedin.venice.stats.VeniceMetricsConfig.OTEL_VENICE_METRICS_EXPORT_TO_LOG; import static com.linkedin.venice.stats.VeniceMetricsConfig.OTEL_VENICE_METRICS_NAMING_FORMAT; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertFalse; @@ -53,8 +53,8 @@ public void testDefaultValuesWithBasicConfig() { @Test public void testCustomValues() { Map otelConfigs = new HashMap<>(); - otelConfigs.put(OTEL_VENICE_ENABLED, "true"); - otelConfigs.put(OTEL_VENICE_EXPORT_TO_LOG, "true"); + otelConfigs.put(OTEL_VENICE_METRICS_ENABLED, "true"); + otelConfigs.put(OTEL_VENICE_METRICS_EXPORT_TO_LOG, "true"); MetricConfig metricConfig = new MetricConfig(); @@ -74,8 +74,8 @@ public void testCustomValues() { @Test(expectedExceptions = IllegalArgumentException.class) public void testOtelMissingConfigs() { Map invalidOtelConfigs = new HashMap<>(); - invalidOtelConfigs.put(OTEL_VENICE_ENABLED, "true"); - invalidOtelConfigs.put(OTEL_VENICE_EXPORT_TO_ENDPOINT, "true"); + invalidOtelConfigs.put(OTEL_VENICE_METRICS_ENABLED, "true"); + invalidOtelConfigs.put(OTEL_VENICE_METRICS_EXPORT_TO_ENDPOINT, "true"); new Builder().setServiceName("TestService") .setMetricPrefix("TestPrefix") @@ -94,7 +94,7 @@ public void testOtelConfigWithInvalidMetricFormat() { @Test public void testOtelConfigWithValidMetricFormat() { Map otelConfigs = new HashMap<>(); - otelConfigs.put(OTEL_VENICE_ENABLED, "true"); + otelConfigs.put(OTEL_VENICE_METRICS_ENABLED, "true"); otelConfigs.put(OTEL_VENICE_METRICS_NAMING_FORMAT, "CAMEL_CASE"); VeniceMetricsConfig config = new Builder().setServiceName("TestService") @@ -108,8 +108,8 @@ public void testOtelConfigWithValidMetricFormat() { @Test public void testEnableHttpGrpcEndpointConfigWithRequiredFields() { Map otelConfigs = new HashMap<>(); - otelConfigs.put(OTEL_VENICE_ENABLED, "true"); - otelConfigs.put(OTEL_VENICE_EXPORT_TO_ENDPOINT, "true"); + otelConfigs.put(OTEL_VENICE_METRICS_ENABLED, "true"); + otelConfigs.put(OTEL_VENICE_METRICS_EXPORT_TO_ENDPOINT, "true"); otelConfigs.put(OTEL_EXPORTER_OTLP_METRICS_PROTOCOL, OtlpConfigUtil.PROTOCOL_HTTP_PROTOBUF); otelConfigs.put(OTEL_EXPORTER_OTLP_METRICS_ENDPOINT, "http://localhost"); @@ -126,8 +126,8 @@ public void testEnableHttpGrpcEndpointConfigWithRequiredFields() { @Test public void testSetAggregationTemporalitySelector() { Map otelConfigs = new HashMap<>(); - otelConfigs.put(OTEL_VENICE_ENABLED, "true"); - otelConfigs.put(OTEL_VENICE_EXPORT_TO_ENDPOINT, "true"); + otelConfigs.put(OTEL_VENICE_METRICS_ENABLED, "true"); + otelConfigs.put(OTEL_VENICE_METRICS_EXPORT_TO_ENDPOINT, "true"); otelConfigs.put(OTEL_EXPORTER_OTLP_METRICS_PROTOCOL, OtlpConfigUtil.PROTOCOL_HTTP_PROTOBUF); otelConfigs.put(OTEL_EXPORTER_OTLP_METRICS_ENDPOINT, "http://localhost"); otelConfigs.put(OTEL_EXPORTER_OTLP_METRICS_TEMPORALITY_PREFERENCE, "delta"); @@ -142,8 +142,8 @@ public void testSetAggregationTemporalitySelector() { @Test(expectedExceptions = IllegalArgumentException.class) public void testSetAggregationTemporalitySelectorInvalidConfig() { Map otelConfigs = new HashMap<>(); - otelConfigs.put(OTEL_VENICE_ENABLED, "true"); - otelConfigs.put(OTEL_VENICE_EXPORT_TO_ENDPOINT, "true"); + otelConfigs.put(OTEL_VENICE_METRICS_ENABLED, "true"); + otelConfigs.put(OTEL_VENICE_METRICS_EXPORT_TO_ENDPOINT, "true"); otelConfigs.put(OTEL_EXPORTER_OTLP_METRICS_PROTOCOL, OtlpConfigUtil.PROTOCOL_HTTP_PROTOBUF); otelConfigs.put(OTEL_EXPORTER_OTLP_METRICS_ENDPOINT, "http://localhost"); otelConfigs.put(OTEL_EXPORTER_OTLP_METRICS_TEMPORALITY_PREFERENCE, "invalid"); @@ -158,8 +158,8 @@ public void testSetAggregationTemporalitySelectorInvalidConfig() { @Test(expectedExceptions = IllegalArgumentException.class) public void testSetHistogramAggregationSelectorInvalidConfig() { Map otelConfigs = new HashMap<>(); - otelConfigs.put(OTEL_VENICE_ENABLED, "true"); - otelConfigs.put(OTEL_VENICE_EXPORT_TO_ENDPOINT, "true"); + otelConfigs.put(OTEL_VENICE_METRICS_ENABLED, "true"); + otelConfigs.put(OTEL_VENICE_METRICS_EXPORT_TO_ENDPOINT, "true"); otelConfigs.put(OTEL_EXPORTER_OTLP_METRICS_PROTOCOL, OtlpConfigUtil.PROTOCOL_HTTP_PROTOBUF); otelConfigs.put(OTEL_EXPORTER_OTLP_METRICS_ENDPOINT, "http://localhost"); otelConfigs.put(OTEL_EXPORTER_OTLP_METRICS_DEFAULT_HISTOGRAM_AGGREGATION, "invalid"); @@ -175,9 +175,9 @@ public void testSetHistogramAggregationSelectorInvalidConfig() { @Test public void testSetOtelCustomDimensionsMap() { Map otelConfigs = new HashMap<>(); - otelConfigs.put(OTEL_VENICE_ENABLED, "true"); - otelConfigs.put(OTEL_VENICE_EXPORT_TO_ENDPOINT, "false"); - otelConfigs.put(OTEL_VENICE_CUSTOM_DIMENSIONS_MAP, "key1=value1,key2=value2"); + otelConfigs.put(OTEL_VENICE_METRICS_ENABLED, "true"); + otelConfigs.put(OTEL_VENICE_METRICS_EXPORT_TO_ENDPOINT, "false"); + otelConfigs.put(OTEL_VENICE_METRICS_CUSTOM_DIMENSIONS_MAP, "key1=value1,key2=value2"); VeniceMetricsConfig config = new Builder().setServiceName("TestService") .setMetricPrefix("TestPrefix") .extractAndSetOtelConfigs(otelConfigs) @@ -190,9 +190,9 @@ public void testSetOtelCustomDimensionsMap() { @Test(expectedExceptions = IllegalArgumentException.class) public void testSetOtelCustomDimensionsMapWithInvalidValue() { Map otelConfigs = new HashMap<>(); - otelConfigs.put(OTEL_VENICE_ENABLED, "true"); - otelConfigs.put(OTEL_VENICE_EXPORT_TO_ENDPOINT, "false"); - otelConfigs.put(OTEL_VENICE_CUSTOM_DIMENSIONS_MAP, "key1=value1,key2=value2=3"); + otelConfigs.put(OTEL_VENICE_METRICS_ENABLED, "true"); + otelConfigs.put(OTEL_VENICE_METRICS_EXPORT_TO_ENDPOINT, "false"); + otelConfigs.put(OTEL_VENICE_METRICS_CUSTOM_DIMENSIONS_MAP, "key1=value1,key2=value2=3"); new Builder().setServiceName("TestService") .setMetricPrefix("TestPrefix") .extractAndSetOtelConfigs(otelConfigs) diff --git a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/integration/utils/VeniceRouterWrapper.java b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/integration/utils/VeniceRouterWrapper.java index d913d64fed..002ac5dc24 100644 --- a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/integration/utils/VeniceRouterWrapper.java +++ b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/integration/utils/VeniceRouterWrapper.java @@ -29,9 +29,9 @@ import static com.linkedin.venice.stats.VeniceMetricsConfig.OTEL_EXPORTER_OTLP_METRICS_ENDPOINT; import static com.linkedin.venice.stats.VeniceMetricsConfig.OTEL_EXPORTER_OTLP_METRICS_PROTOCOL; import static com.linkedin.venice.stats.VeniceMetricsConfig.OTEL_EXPORTER_OTLP_METRICS_TEMPORALITY_PREFERENCE; -import static com.linkedin.venice.stats.VeniceMetricsConfig.OTEL_VENICE_ENABLED; -import static com.linkedin.venice.stats.VeniceMetricsConfig.OTEL_VENICE_EXPORT_TO_ENDPOINT; -import static com.linkedin.venice.stats.VeniceMetricsConfig.OTEL_VENICE_EXPORT_TO_LOG; +import static com.linkedin.venice.stats.VeniceMetricsConfig.OTEL_VENICE_METRICS_ENABLED; +import static com.linkedin.venice.stats.VeniceMetricsConfig.OTEL_VENICE_METRICS_EXPORT_TO_ENDPOINT; +import static com.linkedin.venice.stats.VeniceMetricsConfig.OTEL_VENICE_METRICS_EXPORT_TO_LOG; import com.linkedin.venice.client.store.ClientConfig; import com.linkedin.venice.helix.HelixBaseRoutingRepository; @@ -164,9 +164,9 @@ static StatefulServiceProvider generateService( .put(SYSTEM_SCHEMA_CLUSTER_NAME, clusterName) .put(ROUTER_STORAGE_NODE_CLIENT_TYPE, StorageNodeClientType.APACHE_HTTP_ASYNC_CLIENT.name()) // OpenTelemetry configs - .put(OTEL_VENICE_ENABLED, Boolean.TRUE.toString()) - .put(OTEL_VENICE_EXPORT_TO_LOG, Boolean.TRUE.toString()) - .put(OTEL_VENICE_EXPORT_TO_ENDPOINT, Boolean.TRUE.toString()) + .put(OTEL_VENICE_METRICS_ENABLED, Boolean.TRUE.toString()) + .put(OTEL_VENICE_METRICS_EXPORT_TO_LOG, Boolean.TRUE.toString()) + .put(OTEL_VENICE_METRICS_EXPORT_TO_ENDPOINT, Boolean.TRUE.toString()) .put(OTEL_EXPORTER_OTLP_METRICS_PROTOCOL, "http/protobuf") .put(OTEL_EXPORTER_OTLP_METRICS_ENDPOINT, "http://localhost:4318/v1/metrics") .put(OTEL_EXPORTER_OTLP_METRICS_TEMPORALITY_PREFERENCE, "delta") From e7683b5d9996d7b313fc09f4228e728b2c765cb8 Mon Sep 17 00:00:00 2001 From: Manoj Nagarajan Date: Tue, 26 Nov 2024 13:24:24 -0800 Subject: [PATCH 19/19] address review comments --- .../venice/stats/VeniceMetricsConfig.java | 4 ++- .../HttpResponseStatusCodeCategory.java | 6 ++++ .../router/stats/RouterHttpRequestStats.java | 32 +++++++------------ 3 files changed, 20 insertions(+), 22 deletions(-) diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceMetricsConfig.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceMetricsConfig.java index e954c0f379..0a2a1da619 100644 --- a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceMetricsConfig.java +++ b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceMetricsConfig.java @@ -1,5 +1,7 @@ package com.linkedin.venice.stats; +import static com.linkedin.venice.stats.VeniceOpenTelemetryMetricNamingFormat.SNAKE_CASE; + import com.linkedin.venice.stats.metrics.MetricEntity; import io.opentelemetry.exporter.otlp.internal.OtlpConfigUtil; import io.opentelemetry.sdk.metrics.export.AggregationTemporalitySelector; @@ -183,7 +185,7 @@ public static class Builder { private String otelEndpoint = null; Map otelHeaders = new HashMap<>(); private boolean exportOtelMetricsToLog = false; - private VeniceOpenTelemetryMetricNamingFormat metricNamingFormat = VeniceOpenTelemetryMetricNamingFormat.SNAKE_CASE; + private VeniceOpenTelemetryMetricNamingFormat metricNamingFormat = SNAKE_CASE; private AggregationTemporalitySelector otelAggregationTemporalitySelector = AggregationTemporalitySelector.deltaPreferred(); private boolean useOtelExponentialHistogram = true; diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/dimensions/HttpResponseStatusCodeCategory.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/dimensions/HttpResponseStatusCodeCategory.java index 4f24644ec7..c357326370 100644 --- a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/dimensions/HttpResponseStatusCodeCategory.java +++ b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/dimensions/HttpResponseStatusCodeCategory.java @@ -11,6 +11,12 @@ public class HttpResponseStatusCodeCategory { private static final String UNKNOWN_CATEGORY = "unknown"; + /** + * Private constructor to prevent instantiation of this Utility class + */ + private HttpResponseStatusCodeCategory() { + } + public static String getVeniceHttpResponseStatusCodeCategory(HttpResponseStatus statusCode) { if (statusCode == null) { return UNKNOWN_CATEGORY; diff --git a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouterHttpRequestStats.java b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouterHttpRequestStats.java index d1245b31e9..93fef88a5b 100644 --- a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouterHttpRequestStats.java +++ b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouterHttpRequestStats.java @@ -22,6 +22,7 @@ import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.VENICE_REQUEST_VALIDATION_OUTCOME; import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.VENICE_RESPONSE_STATUS_CODE_CATEGORY; import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.VENICE_STORE_NAME; +import static java.util.Collections.singletonList; import com.linkedin.alpini.base.misc.CollectionUtil; import com.linkedin.alpini.router.monitoring.ScatterGatherStats; @@ -58,7 +59,6 @@ import io.tehuti.metrics.stats.Rate; import io.tehuti.metrics.stats.Total; import java.util.Arrays; -import java.util.Collections; import java.util.List; import java.util.Map; import java.util.concurrent.TimeUnit; @@ -189,10 +189,10 @@ public RouterHttpRequestStats( this::registerSensorFinal, CollectionUtil.>mapBuilder() .put(RouterTehutiMetricNameEnum.HEALTHY_REQUEST, Arrays.asList(new Count(), healthyRequestRate)) - .put(RouterTehutiMetricNameEnum.UNHEALTHY_REQUEST, Collections.singletonList(new Count())) + .put(RouterTehutiMetricNameEnum.UNHEALTHY_REQUEST, singletonList(new Count())) .put(RouterTehutiMetricNameEnum.TARDY_REQUEST, Arrays.asList(new Count(), tardyRequestRate)) - .put(RouterTehutiMetricNameEnum.THROTTLED_REQUEST, Collections.singletonList(new Count())) - .put(RouterTehutiMetricNameEnum.BAD_REQUEST, Collections.singletonList(new Count())) + .put(RouterTehutiMetricNameEnum.THROTTLED_REQUEST, singletonList(new Count())) + .put(RouterTehutiMetricNameEnum.BAD_REQUEST, singletonList(new Count())) .build()); latencyTehutiSensor = registerSensorWithDetailedPercentiles("latency", new Avg(), new Max(0)); @@ -219,7 +219,7 @@ public RouterHttpRequestStats( otelRepository, this::registerSensorFinal, CollectionUtil.>mapBuilder() - .put(RouterTehutiMetricNameEnum.ERROR_RETRY, Collections.singletonList(new Count())) + .put(RouterTehutiMetricNameEnum.ERROR_RETRY, singletonList(new Count())) .build()); allowedRetryCountMetric = new MetricEntityState( @@ -227,9 +227,7 @@ public RouterHttpRequestStats( otelRepository, this::registerSensorFinal, CollectionUtil.>mapBuilder() - .put( - RouterTehutiMetricNameEnum.ALLOWED_RETRY_REQUEST_COUNT, - Collections.singletonList(new OccurrenceRate())) + .put(RouterTehutiMetricNameEnum.ALLOWED_RETRY_REQUEST_COUNT, singletonList(new OccurrenceRate())) .build()); disallowedRetryCountMetric = new MetricEntityState( @@ -237,9 +235,7 @@ public RouterHttpRequestStats( otelRepository, this::registerSensorFinal, CollectionUtil.>mapBuilder() - .put( - RouterTehutiMetricNameEnum.DISALLOWED_RETRY_REQUEST_COUNT, - Collections.singletonList(new OccurrenceRate())) + .put(RouterTehutiMetricNameEnum.DISALLOWED_RETRY_REQUEST_COUNT, singletonList(new OccurrenceRate())) .build()); retryDelayMetric = new MetricEntityState( @@ -255,16 +251,10 @@ public RouterHttpRequestStats( otelRepository, this::registerSensorFinal, CollectionUtil.>mapBuilder() - .put( - RouterTehutiMetricNameEnum.DELAY_CONSTRAINT_ABORTED_RETRY_REQUEST, - Collections.singletonList(new Count())) - .put(RouterTehutiMetricNameEnum.SLOW_ROUTE_ABORTED_RETRY_REQUEST, Collections.singletonList(new Count())) - .put( - RouterTehutiMetricNameEnum.RETRY_ROUTE_LIMIT_ABORTED_RETRY_REQUEST, - Collections.singletonList(new Count())) - .put( - RouterTehutiMetricNameEnum.NO_AVAILABLE_REPLICA_ABORTED_RETRY_REQUEST, - Collections.singletonList(new Count())) + .put(RouterTehutiMetricNameEnum.DELAY_CONSTRAINT_ABORTED_RETRY_REQUEST, singletonList(new Count())) + .put(RouterTehutiMetricNameEnum.SLOW_ROUTE_ABORTED_RETRY_REQUEST, singletonList(new Count())) + .put(RouterTehutiMetricNameEnum.RETRY_ROUTE_LIMIT_ABORTED_RETRY_REQUEST, singletonList(new Count())) + .put(RouterTehutiMetricNameEnum.NO_AVAILABLE_REPLICA_ABORTED_RETRY_REQUEST, singletonList(new Count())) .build()); keyCountMetric = new MetricEntityState(