Skip to content

Commit

Permalink
feat(vlog): New vlog interface + opentelemtry improvements (#2472)
Browse files Browse the repository at this point in the history
OK, that's a big PR for its purpose, sorry for that.

Essence of changes:

## Nice observability installation from the config

We can now write the following:
```
    let observability_config = configs
        .observability
        .clone()
        .context("observability config")?;
    let _observability_guard = observability_config.install()?;
```

instead of having to manually map data from `observability_config` to
`ObservabilityBuilder` methods.
This is done in a non-intrusive way: the functionality is under a
feature flag in `zksync_config` which is only enabled for binaries.

## Nicer `ObservabilityBuilder` interface

Previously we've had "everything in one place", now we have separate
structures for logs, opentelemetry, and sentry. It would be nice to
integrate prometheus there too, but it's a separate task.

## More service information is exposed via opentelemetry

Previously only service name was exposed. Now we also expose pod name
and namespace.
It's done in a non-intrusive way as well, where you can provide _some_
of information if you have it, but otherwise it'll be populated from env
variables (which is fine, because it's not a permanent configuration,
but rather a machine descriptor).
  • Loading branch information
popzxc authored Jul 26, 2024
1 parent e467013 commit c0815cd
Show file tree
Hide file tree
Showing 49 changed files with 1,204 additions and 1,510 deletions.
360 changes: 195 additions & 165 deletions Cargo.lock

Large diffs are not rendered by default.

10 changes: 6 additions & 4 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -139,9 +139,10 @@ num = "0.4.0"
num_cpus = "1.13"
num_enum = "0.7.2"
once_cell = "1"
opentelemetry = "0.20.0"
opentelemetry-otlp = "0.13.0"
opentelemetry-semantic-conventions = "0.12.0"
opentelemetry = "0.24.0"
opentelemetry_sdk = "0.24.0"
opentelemetry-otlp = "0.17.0"
opentelemetry-semantic-conventions = "0.16.0"
pin-project-lite = "0.2.13"
pretty_assertions = "1"
prost = "0.12.1"
Expand Down Expand Up @@ -179,7 +180,8 @@ tower = "0.4.13"
tower-http = "0.5.2"
tracing = "0.1"
tracing-subscriber = "0.3"
tracing-opentelemetry = "0.21.0"
tracing-opentelemetry = "0.25.0"
time = "0.3.36" # Has to be same as used by `tracing-subscriber`
url = "2"
web3 = "0.19.0"
fraction = "0.15.3"
Expand Down
2 changes: 1 addition & 1 deletion core/bin/block_reverter/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ categories.workspace = true
publish = false

[dependencies]
zksync_config.workspace = true
zksync_config = { workspace = true, features = ["observability_ext"] }
zksync_core_leftovers.workspace = true
zksync_env_config.workspace = true
zksync_dal.workspace = true
Expand Down
26 changes: 12 additions & 14 deletions core/bin/block_reverter/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -106,21 +106,19 @@ async fn main() -> anyhow::Result<()> {
let opts = Cli::parse();
let observability_config =
ObservabilityConfig::from_env().context("ObservabilityConfig::from_env()")?;
let log_format: zksync_vlog::LogFormat = observability_config
.log_format
.parse()
.context("Invalid log format")?;

let mut builder = zksync_vlog::ObservabilityBuilder::new()
.with_log_format(log_format)
.disable_default_logs(); // It's a CLI application, so we only need to show logs that were actually requested.
if let Some(sentry_url) = observability_config.sentry_url {
builder = builder
.with_sentry_url(&sentry_url)
.context("Invalid Sentry URL")?
.with_sentry_environment(observability_config.sentry_environment);
}
let _guard = builder.build();
let logs = zksync_vlog::Logs::try_from(observability_config.clone())
.context("logs")?
.disable_default_logs(); // It's a CLI application, so we only need to show logs that were actually requested.;
let sentry: Option<zksync_vlog::Sentry> =
TryFrom::try_from(observability_config.clone()).context("sentry")?;
let opentelemetry: Option<zksync_vlog::OpenTelemetry> =
TryFrom::try_from(observability_config.clone()).context("opentelemetry")?;
let _guard = zksync_vlog::ObservabilityBuilder::new()
.with_logs(Some(logs))
.with_sentry(sentry)
.with_opentelemetry(opentelemetry)
.build();

let general_config: Option<GeneralConfig> = if let Some(path) = opts.config_path {
let yaml = std::fs::read_to_string(&path).with_context(|| path.display().to_string())?;
Expand Down
2 changes: 1 addition & 1 deletion core/bin/contract-verifier/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ publish = false
[dependencies]
zksync_dal.workspace = true
zksync_env_config.workspace = true
zksync_config.workspace = true
zksync_config = { workspace = true, features = ["observability_ext"] }
zksync_contract_verifier_lib.workspace = true
zksync_queued_job_processor.workspace = true
zksync_utils.workspace = true
Expand Down
19 changes: 1 addition & 18 deletions core/bin/contract-verifier/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -146,25 +146,8 @@ async fn main() -> anyhow::Result<()> {
let observability_config = general_config
.observability
.context("ObservabilityConfig")?;
let log_format: zksync_vlog::LogFormat = observability_config
.log_format
.parse()
.context("Invalid log format")?;
let mut builder = zksync_vlog::ObservabilityBuilder::new().with_log_format(log_format);
if let Some(sentry_url) = &observability_config.sentry_url {
builder = builder
.with_sentry_url(sentry_url)
.expect("Invalid Sentry URL")
.with_sentry_environment(observability_config.sentry_environment);
}
let _guard = builder.build();

// Report whether sentry is running after the logging subsystem was initialized.
if let Some(sentry_url) = observability_config.sentry_url {
tracing::info!("Sentry configured with URL: {sentry_url}");
} else {
tracing::info!("No sentry URL was provided");
}
let _observability_guard = observability_config.install()?;

let (stop_sender, stop_receiver) = watch::channel(false);
let (stop_signal_sender, mut stop_signal_receiver) = mpsc::channel(256);
Expand Down
36 changes: 17 additions & 19 deletions core/bin/external_node/src/config/observability.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use std::{collections::HashMap, time::Duration};
use anyhow::Context as _;
use serde::Deserialize;
use zksync_config::configs::GeneralConfig;
use zksync_vlog::{prometheus::PrometheusExporterConfig, LogFormat};
use zksync_vlog::{logs::LogFormat, prometheus::PrometheusExporterConfig};

use super::{ConfigurationSource, Environment};

Expand Down Expand Up @@ -81,26 +81,24 @@ impl ObservabilityENConfig {
}

pub fn build_observability(&self) -> anyhow::Result<zksync_vlog::ObservabilityGuard> {
let mut builder = zksync_vlog::ObservabilityBuilder::new().with_log_format(self.log_format);
if let Some(log_directives) = self.log_directives.clone() {
builder = builder.with_log_directives(log_directives)
};
let logs = zksync_vlog::Logs::from(self.log_format)
.with_log_directives(self.log_directives.clone());

// Some legacy deployments use `unset` as an equivalent of `None`.
let sentry_url = self.sentry_url.as_deref().filter(|&url| url != "unset");
if let Some(sentry_url) = sentry_url {
builder = builder
.with_sentry_url(sentry_url)
.context("Invalid Sentry URL")?
.with_sentry_environment(self.sentry_environment.clone());
}
let guard = builder.build();

// Report whether sentry is running after the logging subsystem was initialized.
if let Some(sentry_url) = sentry_url {
tracing::info!("Sentry configured with URL: {sentry_url}");
} else {
tracing::info!("No sentry URL was provided");
}
let sentry = sentry_url
.map(|url| {
anyhow::Ok(
zksync_vlog::Sentry::new(url)
.context("Invalid Sentry URL")?
.with_environment(self.sentry_environment.clone()),
)
})
.transpose()?;
let guard = zksync_vlog::ObservabilityBuilder::new()
.with_logs(Some(logs))
.with_sentry(sentry)
.build();
Ok(guard)
}

Expand Down
6 changes: 3 additions & 3 deletions core/bin/external_node/src/config/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,20 +42,20 @@ fn parsing_observability_config() {
assert_eq!(config.prometheus_port, Some(3322));
assert_eq!(config.sentry_url.unwrap(), "https://example.com/");
assert_eq!(config.sentry_environment.unwrap(), "mainnet - mainnet2");
assert_matches!(config.log_format, zksync_vlog::LogFormat::Plain);
assert_matches!(config.log_format, zksync_vlog::logs::LogFormat::Plain);
assert_eq!(config.prometheus_push_interval_ms, 10_000);

env_vars.0.insert("MISC_LOG_FORMAT", "json");
let config = ObservabilityENConfig::new(&env_vars).unwrap();
assert_matches!(config.log_format, zksync_vlog::LogFormat::Json);
assert_matches!(config.log_format, zksync_vlog::logs::LogFormat::Json);

// If both the canonical and obsolete vars are specified, the canonical one should prevail.
env_vars.0.insert("EN_LOG_FORMAT", "plain");
env_vars
.0
.insert("EN_SENTRY_URL", "https://example.com/new");
let config = ObservabilityENConfig::new(&env_vars).unwrap();
assert_matches!(config.log_format, zksync_vlog::LogFormat::Plain);
assert_matches!(config.log_format, zksync_vlog::logs::LogFormat::Plain);
assert_eq!(config.sentry_url.unwrap(), "https://example.com/new");
}

Expand Down
4 changes: 3 additions & 1 deletion core/bin/external_node/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -824,6 +824,8 @@ async fn main() -> anyhow::Result<()> {
if !opt.enable_consensus {
config.consensus = None;
}
// Note: when old code will be removed, observability must be build within
// tokio context.
let _guard = config.observability.build_observability()?;

// Build L1 and L2 clients.
Expand Down Expand Up @@ -856,7 +858,7 @@ async fn main() -> anyhow::Result<()> {
// We run the node from a different thread, since the current thread is in tokio context.
std::thread::spawn(move || {
let node =
ExternalNodeBuilder::new(config).build(opt.components.0.into_iter().collect())?;
ExternalNodeBuilder::new(config)?.build(opt.components.0.into_iter().collect())?;
node.run()?;
anyhow::Ok(())
})
Expand Down
10 changes: 5 additions & 5 deletions core/bin/external_node/src/node_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -69,11 +69,11 @@ pub(crate) struct ExternalNodeBuilder {
}

impl ExternalNodeBuilder {
pub fn new(config: ExternalNodeConfig) -> Self {
Self {
node: ZkStackServiceBuilder::new(),
pub fn new(config: ExternalNodeConfig) -> anyhow::Result<Self> {
Ok(Self {
node: ZkStackServiceBuilder::new().context("Cannot create ZkStackServiceBuilder")?,
config,
}
})
}

fn add_sigint_handler_layer(mut self) -> anyhow::Result<Self> {
Expand Down Expand Up @@ -587,7 +587,7 @@ impl ExternalNodeBuilder {
}
}

Ok(self.node.build()?)
Ok(self.node.build())
}
}

Expand Down
8 changes: 4 additions & 4 deletions core/bin/external_node/src/tests/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ async fn external_node_basics(components_str: &'static str) {

let node_handle = tokio::task::spawn_blocking(move || {
std::thread::spawn(move || {
let mut node = ExternalNodeBuilder::new(env.config);
let mut node = ExternalNodeBuilder::new(env.config)?;
inject_test_layers(
&mut node,
env.sigint_receiver,
Expand Down Expand Up @@ -97,7 +97,7 @@ async fn node_reacts_to_stop_signal_during_initial_reorg_detection() {

let mut node_handle = tokio::task::spawn_blocking(move || {
std::thread::spawn(move || {
let mut node = ExternalNodeBuilder::new(env.config);
let mut node = ExternalNodeBuilder::new(env.config)?;
inject_test_layers(
&mut node,
env.sigint_receiver,
Expand Down Expand Up @@ -133,7 +133,7 @@ async fn running_tree_without_core_is_not_allowed() {

let node_handle = tokio::task::spawn_blocking(move || {
std::thread::spawn(move || {
let mut node = ExternalNodeBuilder::new(env.config);
let mut node = ExternalNodeBuilder::new(env.config)?;
inject_test_layers(
&mut node,
env.sigint_receiver,
Expand Down Expand Up @@ -170,7 +170,7 @@ async fn running_tree_api_without_tree_is_not_allowed() {

let node_handle = tokio::task::spawn_blocking(move || {
std::thread::spawn(move || {
let mut node = ExternalNodeBuilder::new(env.config);
let mut node = ExternalNodeBuilder::new(env.config)?;
inject_test_layers(
&mut node,
env.sigint_receiver,
Expand Down
2 changes: 1 addition & 1 deletion core/bin/merkle_tree_consistency_checker/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ categories.workspace = true
publish = false

[dependencies]
zksync_config.workspace = true
zksync_config = { workspace = true, features = ["observability_ext"] }
zksync_env_config.workspace = true
zksync_merkle_tree.workspace = true
zksync_types.workspace = true
Expand Down
13 changes: 1 addition & 12 deletions core/bin/merkle_tree_consistency_checker/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,18 +54,7 @@ impl Cli {
fn main() -> anyhow::Result<()> {
let observability_config =
ObservabilityConfig::from_env().context("ObservabilityConfig::from_env()")?;
let log_format: zksync_vlog::LogFormat = observability_config
.log_format
.parse()
.context("Invalid log format")?;
let mut builder = zksync_vlog::ObservabilityBuilder::new().with_log_format(log_format);
if let Some(sentry_url) = observability_config.sentry_url {
builder = builder
.with_sentry_url(&sentry_url)
.context("Invalid Sentry URL")?
.with_sentry_environment(observability_config.sentry_environment);
}
let _guard = builder.build();
let _observability_guard = observability_config.install()?;

let db_config = DBConfig::from_env().context("DBConfig::from_env()")?;
Cli::parse().run(&db_config)
Expand Down
2 changes: 1 addition & 1 deletion core/bin/snapshots_creator/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ publish = false

[dependencies]
vise.workspace = true
zksync_config.workspace = true
zksync_config = { workspace = true, features = ["observability_ext"] }
zksync_dal.workspace = true
zksync_env_config.workspace = true
zksync_types.workspace = true
Expand Down
14 changes: 1 addition & 13 deletions core/bin/snapshots_creator/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -70,21 +70,9 @@ async fn main() -> anyhow::Result<()> {
let observability_config = general_config
.observability
.context("observability config")?;
let log_format: zksync_vlog::LogFormat = observability_config
.log_format
.parse()
.context("Invalid log format")?;

let _observability_guard = observability_config.install()?;
let prometheus_exporter_task =
maybe_enable_prometheus_metrics(general_config.prometheus_config, stop_receiver).await?;
let mut builder = zksync_vlog::ObservabilityBuilder::new().with_log_format(log_format);
if let Some(sentry_url) = observability_config.sentry_url {
builder = builder
.with_sentry_url(&sentry_url)
.context("Invalid Sentry URL")?
.with_sentry_environment(observability_config.sentry_environment);
}
let _guard = builder.build();
tracing::info!("Starting snapshots creator");

let creator_config = general_config
Expand Down
2 changes: 1 addition & 1 deletion core/bin/zksync_server/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ categories.workspace = true
publish = false

[dependencies]
zksync_config.workspace = true
zksync_config = { workspace = true, features = ["observability_ext"] }
zksync_env_config.workspace = true
zksync_eth_client.workspace = true
zksync_protobuf_config.workspace = true
Expand Down
42 changes: 11 additions & 31 deletions core/bin/zksync_server/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -113,36 +113,6 @@ fn main() -> anyhow::Result<()> {
}
};

let observability_config = configs
.observability
.clone()
.context("observability config")?;

let log_format: zksync_vlog::LogFormat = observability_config
.log_format
.parse()
.context("Invalid log format")?;

let mut builder = zksync_vlog::ObservabilityBuilder::new().with_log_format(log_format);
if let Some(log_directives) = observability_config.log_directives {
builder = builder.with_log_directives(log_directives);
}

if let Some(sentry_url) = &observability_config.sentry_url {
builder = builder
.with_sentry_url(sentry_url)
.expect("Invalid Sentry URL")
.with_sentry_environment(observability_config.sentry_environment);
}
let _guard = builder.build();

// Report whether sentry is running after the logging subsystem was initialized.
if let Some(sentry_url) = observability_config.sentry_url {
tracing::info!("Sentry configured with URL: {sentry_url}");
} else {
tracing::info!("No sentry URL was provided");
}

let wallets = match opt.wallets_path {
None => tmp_config.wallets(),
Some(path) => {
Expand Down Expand Up @@ -186,8 +156,18 @@ fn main() -> anyhow::Result<()> {
.context("failed decoding genesis YAML config")?
}
};
let observability_config = configs
.observability
.clone()
.context("observability config")?;

let node = MainNodeBuilder::new(configs, wallets, genesis, contracts_config, secrets);
let node = MainNodeBuilder::new(configs, wallets, genesis, contracts_config, secrets)?;

let _observability_guard = {
// Observability initialization should be performed within tokio context.
let _context_guard = node.runtime_handle().enter();
observability_config.install()?
};

if opt.genesis {
// If genesis is requested, we don't need to run the node.
Expand Down
Loading

0 comments on commit c0815cd

Please sign in to comment.