Skip to content

Commit

Permalink
feat(telemetry): Add support for OTel metrics
Browse files Browse the repository at this point in the history
Signed-off-by: Caleb Schoepp <caleb.schoepp@fermyon.com>
  • Loading branch information
calebschoepp committed Apr 29, 2024
1 parent 33afee8 commit 06af58e
Show file tree
Hide file tree
Showing 10 changed files with 241 additions and 66 deletions.
2 changes: 2 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions crates/telemetry/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,4 @@ tracing-appender = "0.2.2"
tracing-opentelemetry = "0.23.0"
tracing-subscriber = { version = "0.3.17", features = ["env-filter", "json", "registry"] }
url = "2.2.2"
terminal = { path = "../terminal" }
72 changes: 60 additions & 12 deletions crates/telemetry/src/env.rs
Original file line number Diff line number Diff line change
@@ -1,18 +1,40 @@
/// Returns a boolean indicating if the OTEL layer should be enabled.
use std::env::VarError;

use opentelemetry_otlp::{
OTEL_EXPORTER_OTLP_ENDPOINT, OTEL_EXPORTER_OTLP_METRICS_ENDPOINT, OTEL_EXPORTER_OTLP_PROTOCOL,
OTEL_EXPORTER_OTLP_TRACES_ENDPOINT,
};

/// Returns a boolean indicating if the OTEL tracing layer should be enabled.
///
/// It is considered enabled if any of the following environment variables are set and not empty:
/// - `OTEL_EXPORTER_OTLP_ENDPOINT`
/// - `OTEL_EXPORTER_OTLP_TRACES_ENDPOINT`
///
/// Note that this is overridden if OTEL_SDK_DISABLED is set and not empty.
pub(crate) fn otel_tracing_enabled() -> bool {
otel_enabled(&[
OTEL_EXPORTER_OTLP_ENDPOINT,
OTEL_EXPORTER_OTLP_TRACES_ENDPOINT,
]) && !otel_sdk_disabled()
}

/// Returns a boolean indicating if the OTEL metrics layer should be enabled.
///
/// It is considered enabled if any of the following environment variables are set and not empty:
/// - `OTEL_EXPORTER_OTLP_ENDPOINT`
/// - `OTEL_EXPORTER_OTLP_METRICS_ENDPOINT`
///
/// Note that this is overridden if OTEL_SDK_DISABLED is set and not empty.
pub(crate) fn otel_enabled() -> bool {
const ENABLING_VARS: &[&str] = &[
"OTEL_EXPORTER_OTLP_ENDPOINT",
"OTEL_EXPORTER_OTLP_TRACES_ENDPOINT",
"OTEL_EXPORTER_OTLP_METRICS_ENDPOINT",
];
ENABLING_VARS
pub(crate) fn otel_metrics_enabled() -> bool {
otel_enabled(&[
OTEL_EXPORTER_OTLP_ENDPOINT,
OTEL_EXPORTER_OTLP_METRICS_ENDPOINT,
]) && !otel_sdk_disabled()
}

fn otel_enabled(enabling_vars: &[&str]) -> bool {
enabling_vars
.iter()
.any(|key| std::env::var_os(key).is_some_and(|val| !val.is_empty()))
}
Expand All @@ -34,15 +56,41 @@ pub(crate) enum OtlpProtocol {
impl OtlpProtocol {
/// Returns the protocol to be used for exporting traces as defined by the environment.
pub(crate) fn traces_protocol_from_env() -> Self {
let trace_protocol = std::env::var("OTEL_EXPORTER_OTLP_TRACES_PROTOCOL");
let general_protocol = std::env::var("OTEL_EXPORTER_OTLP_PROTOCOL");
let protocol = trace_protocol.unwrap_or(general_protocol.unwrap_or_default());
Self::protocol_from_env(
std::env::var("OTEL_EXPORTER_OTLP_TRACES_PROTOCOL"),
std::env::var(OTEL_EXPORTER_OTLP_PROTOCOL),
)
}

/// Returns the protocol to be used for exporting metrics as defined by the environment.
pub(crate) fn metrics_protocol_from_env() -> Self {
Self::protocol_from_env(
std::env::var("OTEL_EXPORTER_OTLP_METRICS_PROTOCOL"),
std::env::var(OTEL_EXPORTER_OTLP_PROTOCOL),
)
}

fn protocol_from_env(
specific_protocol: Result<String, VarError>,
general_protocol: Result<String, VarError>,
) -> Self {
let protocol =
specific_protocol.unwrap_or(general_protocol.unwrap_or("http/protobuf".to_string()));

static WARN_ONCE: std::sync::Once = std::sync::Once::new();

match protocol.as_str() {
"grpc" => Self::Grpc,
"http/protobuf" => Self::HttpProtobuf,
"http/json" => Self::HttpJson,
_ => Self::HttpProtobuf,
s => {
WARN_ONCE.call_once(|| {
terminal::warn!(
"'{s}' is not a valid OTLP protocol, defaulting to http/protobuf"
);
});
Self::HttpProtobuf
}
}
}
}
53 changes: 42 additions & 11 deletions crates/telemetry/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
use std::io::IsTerminal;

use env::otel_enabled;
use env::otel_sdk_disabled;
use env::otel_metrics_enabled;
use env::otel_tracing_enabled;
use opentelemetry_sdk::propagation::TraceContextPropagator;
use tracing_subscriber::{fmt, prelude::*, registry, EnvFilter, Layer};

pub mod detector;
mod env;
pub mod metrics;
mod propagation;
mod traces;

Expand All @@ -16,9 +17,31 @@ pub use propagation::inject_trace_context;
/// Initializes telemetry for Spin using the [tracing] library.
///
/// Under the hood this involves initializing a [tracing::Subscriber] with multiple [Layer]s. One
/// [Layer] emits [tracing] events to stderr, and another sends spans to an OTEL collector.
/// [Layer] emits [tracing] events to stderr, another sends spans to an OTel collector, and another
/// sends metrics to an OTel collector.
///
/// Configuration is pulled from the environment.
/// Configuration for the OTel layers is pulled from the environment.
///
/// Examples of emitting traces from Spin:
///
/// ```no_run
/// # use tracing::instrument;
/// #[instrument(name = "span_name", err(level = Level::INFO), fields(otel.name = "dynamically set name"))]
/// fn func_you_want_to_trace() {}
/// ```
///
/// Some notes on tracing:
///
/// - If you don't want the span to be collected by default emit it at a trace or debug level.
/// - Make sure you `.in_current_span()` any spawned tasks so the span context is propagated.
/// - Use the otel.name attribute to dynamically set the span name.
/// - Use the err argument to have instrument automatically handle errors.
///
/// Examples of emitting metrics from Spin:
///
/// ```no_run
/// spin_telemetry::metrics::monotonic_counter!(spin.metric_name = 1, metric_attribute = "value");
/// ```
pub fn init(spin_version: String) -> anyhow::Result<ShutdownGuard> {
// This layer will print all tracing library log messages to stderr.
let fmt_layer = fmt::layer()
Expand All @@ -30,19 +53,27 @@ pub fn init(spin_version: String) -> anyhow::Result<ShutdownGuard> {
.add_directive("watchexec=off".parse()?),
);

// We only want to build the otel layer if the user passed some endpoint configuration and it wasn't explicitly disabled.
let build_otel_layer = !otel_sdk_disabled() && otel_enabled();
let otel_layer = if build_otel_layer {
// In this case we want to set the error handler to log errors to the tracing layer.
opentelemetry::global::set_error_handler(otel_error_handler)?;
// Even if metrics or tracing aren't enabled we're okay to turn on the global error handler
opentelemetry::global::set_error_handler(otel_error_handler)?;

let otel_tracing_layer = if otel_tracing_enabled() {
Some(traces::otel_tracing_layer(spin_version.clone())?)
} else {
None
};

Some(traces::otel_tracing_layer(spin_version)?)
let otel_metrics_layer = if otel_metrics_enabled() {
Some(metrics::otel_metrics_layer(spin_version)?)
} else {
None
};

// Build a registry subscriber with the layers we want to use.
registry().with(otel_layer).with(fmt_layer).init();
registry()
.with(otel_tracing_layer)
.with(otel_metrics_layer)
.with(fmt_layer)
.init();

// Used to propagate trace information in the standard W3C TraceContext format. Even if the otel
// layer is disabled we still want to propagate trace context.
Expand Down
118 changes: 118 additions & 0 deletions crates/telemetry/src/metrics.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
use std::time::Duration;

use anyhow::{bail, Result};
use opentelemetry_otlp::MetricsExporterBuilder;
use opentelemetry_sdk::{
metrics::{
reader::{DefaultAggregationSelector, DefaultTemporalitySelector},
PeriodicReader, SdkMeterProvider,
},
resource::{EnvResourceDetector, TelemetryResourceDetector},
runtime, Resource,
};
use tracing_opentelemetry::{MetricsLayer, OpenTelemetryLayer};
use tracing_subscriber::{filter::Filtered, layer::Layered, EnvFilter, Registry};

use crate::{detector::SpinResourceDetector, env::OtlpProtocol};

/// Constructs a layer for the tracing subscriber that sends metrics to an OTEL collector.
///
/// It pulls OTEL configuration from the environment based on the variables defined
/// [here](https://opentelemetry.io/docs/specs/otel/protocol/exporter/) and
/// [here](https://opentelemetry.io/docs/specs/otel/configuration/sdk-environment-variables/#general-sdk-configuration).
pub(crate) fn otel_metrics_layer(spin_version: String) -> Result<CustomMetricsLayer> {
let resource = Resource::from_detectors(
Duration::from_secs(5),
vec![
// Set service.name from env OTEL_SERVICE_NAME > env OTEL_RESOURCE_ATTRIBUTES > spin
// Set service.version from Spin metadata
Box::new(SpinResourceDetector::new(spin_version)),
// Sets fields from env OTEL_RESOURCE_ATTRIBUTES
Box::new(EnvResourceDetector::new()),
// Sets telemetry.sdk{name, language, version}
Box::new(TelemetryResourceDetector),
],
);

// This will configure the exporter based on the OTEL_EXPORTER_* environment variables. We
// currently default to using the HTTP exporter but in the future we could select off of the
// combination of OTEL_EXPORTER_OTLP_PROTOCOL and OTEL_EXPORTER_OTLP_TRACES_PROTOCOL to
// determine whether we should use http/protobuf or grpc.
let exporter_builder: MetricsExporterBuilder = match OtlpProtocol::metrics_protocol_from_env() {
OtlpProtocol::Grpc => opentelemetry_otlp::new_exporter().tonic().into(),
OtlpProtocol::HttpProtobuf => opentelemetry_otlp::new_exporter().http().into(),
OtlpProtocol::HttpJson => bail!("http/json OTLP protocol is not supported"),
};
let exporter = exporter_builder.build_metrics_exporter(
Box::new(DefaultTemporalitySelector::new()),
Box::new(DefaultAggregationSelector::new()),
)?;

let reader = PeriodicReader::builder(exporter, runtime::Tokio).build();
let meter_provider = SdkMeterProvider::builder()
.with_reader(reader)
.with_resource(resource)
.build();

Ok(MetricsLayer::new(meter_provider))
}

#[macro_export]
/// Records an increment to the named counter with the given attributes.
///
/// The increment may only be an i64 or f64. You must not mix types for the same metric.
///
/// ```no_run
/// counter!(spin.metric_name = 1, metric_attribute = "value");
/// ```
macro_rules! counter {
($metric:ident $(. $suffixes:ident)* = $metric_value:expr $(, $attrs:ident=$values:expr)*) => {
tracing::trace!(counter.$metric $(. $suffixes)* = $metric_value $(, $attrs=$values)*);
}
}

#[macro_export]
/// Adds an additional value to the distribution of the named histogram with the given attributes.
///
/// The increment may only be an i64 or f64. You must not mix types for the same metric.
///
/// ```no_run
/// histogram!(spin.metric_name = 1.5, metric_attribute = "value");
/// ```
macro_rules! histogram {
($metric:ident $(. $suffixes:ident)* = $metric_value:expr $(, $attrs:ident=$values:expr)*) => {
tracing::trace!(histogram.$metric $(. $suffixes)* = $metric_value $(, $attrs=$values)*);
}
}

#[macro_export]
/// Records an increment to the named monotonic counter with the given attributes.
///
/// The increment may only be a positive i64 or f64. You must not mix types for the same metric.
///
/// ```no_run
/// monotonic_counter!(spin.metric_name = 1, metric_attribute = "value");
/// ```
macro_rules! monotonic_counter {
($metric:ident $(. $suffixes:ident)* = $metric_value:expr $(, $attrs:ident=$values:expr)*) => {
tracing::trace!(monotonic_counter.$metric $(. $suffixes)* = $metric_value $(, $attrs=$values)*);
}
}

pub use counter;
pub use histogram;
pub use monotonic_counter;

/// This really large type alias is require to make the registry.with() pattern happy.
type CustomMetricsLayer = MetricsLayer<
Layered<
Option<
Filtered<
OpenTelemetryLayer<Registry, opentelemetry_sdk::trace::Tracer>,
EnvFilter,
Registry,
>,
>,
Registry,
>,
>;
45 changes: 2 additions & 43 deletions crates/telemetry/src/traces.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
use std::time::Duration;

use anyhow::bail;
use opentelemetry_otlp::{SpanExporterBuilder, WithExportConfig};
use opentelemetry_otlp::{OTEL_EXPORTER_OTLP_ENDPOINT, OTEL_EXPORTER_OTLP_TRACES_ENDPOINT};
use opentelemetry_otlp::SpanExporterBuilder;
use opentelemetry_sdk::{
resource::{EnvResourceDetector, TelemetryResourceDetector},
trace::Tracer,
Expand Down Expand Up @@ -41,18 +40,11 @@ pub(crate) fn otel_tracing_layer(
// currently default to using the HTTP exporter but in the future we could select off of the
// combination of OTEL_EXPORTER_OTLP_PROTOCOL and OTEL_EXPORTER_OTLP_TRACES_PROTOCOL to
// determine whether we should use http/protobuf or grpc.
let mut exporter: SpanExporterBuilder = match OtlpProtocol::traces_protocol_from_env() {
let exporter: SpanExporterBuilder = match OtlpProtocol::traces_protocol_from_env() {
OtlpProtocol::Grpc => opentelemetry_otlp::new_exporter().tonic().into(),
OtlpProtocol::HttpProtobuf => opentelemetry_otlp::new_exporter().http().into(),
OtlpProtocol::HttpJson => bail!("http/json OTLP protocol is not supported"),
};
if let Some(endpoint) = fix_endpoint_bug() {
match exporter {
SpanExporterBuilder::Tonic(inner) => exporter = inner.with_endpoint(endpoint).into(),
SpanExporterBuilder::Http(inner) => exporter = inner.with_endpoint(endpoint).into(),
_ => {}
}
}

let tracer = opentelemetry_otlp::new_pipeline()
.tracing()
Expand All @@ -71,36 +63,3 @@ pub(crate) fn otel_tracing_layer(
.with_threads(false)
.with_filter(env_filter))
}

// This mitigation was taken from https://github.com/neondatabase/neon/blob/main/libs/tracing-utils/src/lib.rs
//
// opentelemetry-otlp v0.15.0 has a bug in how it uses the
// OTEL_EXPORTER_OTLP_ENDPOINT env variable. According to the
// OpenTelemetry spec at
// <https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/protocol/exporter.md#endpoint-urls-for-otlphttp>,
// the full exporter URL is formed by appending "/v1/traces" to the value
// of OTEL_EXPORTER_OTLP_ENDPOINT. However, opentelemetry-otlp only does
// that with the grpc-tonic exporter. Other exporters, like the HTTP
// exporter, use the URL from OTEL_EXPORTER_OTLP_ENDPOINT as is, without
// appending "/v1/traces".
//
// See https://github.com/open-telemetry/opentelemetry-rust/pull/950
//
// Work around that by checking OTEL_EXPORTER_OTLP_ENDPOINT, and setting
// the endpoint url with the "/v1/traces" path ourselves. If the bug is
// fixed in a later version, we can remove this code. But if we don't
// remember to remove this, it won't do any harm either, as the crate will
// just ignore the OTEL_EXPORTER_OTLP_ENDPOINT setting when the endpoint
// is set directly with `with_endpoint`.
fn fix_endpoint_bug() -> Option<String> {
if std::env::var(OTEL_EXPORTER_OTLP_TRACES_ENDPOINT).is_err() {
if let Ok(mut endpoint) = std::env::var(OTEL_EXPORTER_OTLP_ENDPOINT) {
if !endpoint.ends_with('/') {
endpoint.push('/');
}
endpoint.push_str("v1/traces");
return Some(endpoint);
}
}
None
}
Loading

0 comments on commit 06af58e

Please sign in to comment.