Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Introduce subsystem benchmarking tool #2528

Merged
merged 56 commits into from
Dec 14, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
56 commits
Select commit Hold shift + click to select a range
01af630
skeleton
sandreim Oct 25, 2023
7c22abe
wip
sandreim Nov 6, 2023
c3adc77
measure tput and fixes
sandreim Nov 6, 2023
31b0351
add network emulation
sandreim Nov 7, 2023
e4bb037
cleanup
sandreim Nov 7, 2023
a694924
Add latency emulation
sandreim Nov 7, 2023
7ca4dba
support multiple pov sizes
sandreim Nov 8, 2023
0430b5b
new metric in recovery and more testing
sandreim Nov 8, 2023
027bcd8
CLI update and fixes
sandreim Nov 9, 2023
5a05da0
peer stats
sandreim Nov 9, 2023
895e8d6
Switch stats to atomics
sandreim Nov 10, 2023
a2fb0c9
add more network metrics, new load generator
sandreim Nov 12, 2023
d1b9fa3
refactor
sandreim Nov 14, 2023
c5937ab
pretty cli + minor refactor + remove unused
sandreim Nov 15, 2023
d6c259d
update
sandreim Nov 15, 2023
050529b
remove comment
sandreim Nov 15, 2023
cb38be5
separate cli options for availability
sandreim Nov 17, 2023
24a736a
implement unified and extensible configuration
sandreim Nov 17, 2023
2843865
Prepare to swtich to overseer
sandreim Nov 24, 2023
fd4620e
Merge branch 'master' of github.com:paritytech/polkadot-sdk into sand…
sandreim Nov 24, 2023
b17a147
add mocked subsystems
sandreim Nov 27, 2023
4724d8c
full overseer based implementation complete
sandreim Nov 27, 2023
7aed30f
make clean
sandreim Nov 27, 2023
b51485b
more cleaning
sandreim Nov 27, 2023
7e46444
more cleaning
sandreim Nov 27, 2023
d3df927
proper overseer control
sandreim Nov 27, 2023
7557768
refactor CLI display of env stats
sandreim Nov 27, 2023
787dc00
Add grafana dashboards for DA read
sandreim Nov 28, 2023
cd18f8d
network stats fixes
sandreim Nov 28, 2023
e8506b3
move examples and grafana
sandreim Nov 28, 2023
cbb6772
Add readme
sandreim Nov 28, 2023
1a80870
fmt + readme updates
sandreim Nov 28, 2023
eb49ea0
update dashboard and sample
sandreim Nov 28, 2023
b249056
remove unused
sandreim Nov 28, 2023
7fbcdfc
Merge branch 'master' of github.com:paritytech/polkadot-sdk into sand…
sandreim Nov 28, 2023
fb34181
revert unneeded changes
sandreim Nov 28, 2023
3a716a5
add missing comments and minor fixes
sandreim Nov 29, 2023
a092b76
clippy
sandreim Nov 29, 2023
ca27370
zepter format features --fix
sandreim Nov 29, 2023
be814e5
fix markdown
sandreim Nov 29, 2023
11ce8f5
remove sleep till end of block
sandreim Nov 29, 2023
8d93abc
review
sandreim Nov 29, 2023
af141ee
Emulated network improvements
sandreim Dec 1, 2023
29d80fa
fix comment
sandreim Dec 1, 2023
74e68bb
Merge branch 'master' of github.com:paritytech/polkadot-sdk into sand…
sandreim Dec 8, 2023
4d21e5b
cargo lock
sandreim Dec 8, 2023
3e25fdc
more review feedback
sandreim Dec 8, 2023
1458a73
change back to debug
sandreim Dec 8, 2023
baa124e
fix test build
sandreim Dec 12, 2023
fde982f
fix markdown
sandreim Dec 12, 2023
47c2643
fix test
sandreim Dec 13, 2023
8b49077
taplo fix
sandreim Dec 13, 2023
42f6834
Merge branch 'master' of github.com:paritytech/polkadot-sdk into sand…
sandreim Dec 13, 2023
4c86691
cargo lock
sandreim Dec 13, 2023
bd128b3
clippy
sandreim Dec 14, 2023
1021efb
more clippy
sandreim Dec 14, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
92 changes: 90 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,7 @@ members = [
"polkadot/node/primitives",
"polkadot/node/service",
"polkadot/node/subsystem",
"polkadot/node/subsystem-bench",
"polkadot/node/subsystem-test-helpers",
"polkadot/node/subsystem-types",
"polkadot/node/subsystem-util",
Expand Down
4 changes: 4 additions & 0 deletions polkadot/node/network/availability-recovery/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ workspace = true

[dependencies]
futures = "0.3.21"
tokio = "1.24.2"
schnellru = "0.2.1"
rand = "0.8.5"
fatality = "0.0.6"
Expand Down Expand Up @@ -40,3 +41,6 @@ sc-network = { path = "../../../../substrate/client/network" }

polkadot-node-subsystem-test-helpers = { path = "../../subsystem-test-helpers" }
polkadot-primitives-test-helpers = { path = "../../../primitives/test-helpers" }

[features]
subsystem-benchmarks = []
13 changes: 10 additions & 3 deletions polkadot/node/network/availability-recovery/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ mod error;
mod futures_undead;
mod metrics;
mod task;
use metrics::Metrics;
pub use metrics::Metrics;

#[cfg(test)]
mod tests;
Expand Down Expand Up @@ -603,7 +603,8 @@ impl AvailabilityRecoverySubsystem {
}
}

async fn run<Context>(self, mut ctx: Context) -> SubsystemResult<()> {
/// Starts the inner subsystem loop.
pub async fn run<Context>(self, mut ctx: Context) -> SubsystemResult<()> {
let mut state = State::default();
let Self {
mut req_receiver,
Expand Down Expand Up @@ -681,6 +682,7 @@ impl AvailabilityRecoverySubsystem {
&mut state,
signal,
).await? {
gum::debug!(target: LOG_TARGET, "subsystem concluded");
return Ok(());
}
FromOrchestra::Communication { msg } => {
Expand Down Expand Up @@ -845,12 +847,17 @@ async fn erasure_task_thread(
let _ = sender.send(maybe_data);
},
None => {
gum::debug!(
gum::trace!(
target: LOG_TARGET,
"Erasure task channel closed. Node shutting down ?",
);
break
},
}

// In benchmarks this is a very hot loop not yielding at all.
// To update CPU metrics for the task we need to yield.
#[cfg(feature = "subsystem-benchmarks")]
tokio::task::yield_now().await;
}
}
17 changes: 14 additions & 3 deletions polkadot/node/network/availability-recovery/src/metrics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,10 @@ struct MetricsInner {
///
/// Gets incremented on each sent chunk requests.
chunk_requests_issued: Counter<U64>,

/// Total number of bytes recovered
///
/// Gets incremented on each succesful recovery
recovered_bytes_total: Counter<U64>,
/// A counter for finished chunk requests.
///
/// Split by result:
Expand Down Expand Up @@ -133,9 +136,10 @@ impl Metrics {
}

/// A full recovery succeeded.
pub fn on_recovery_succeeded(&self) {
pub fn on_recovery_succeeded(&self, bytes: usize) {
if let Some(metrics) = &self.0 {
metrics.full_recoveries_finished.with_label_values(&["success"]).inc()
metrics.full_recoveries_finished.with_label_values(&["success"]).inc();
metrics.recovered_bytes_total.inc_by(bytes as u64)
}
}

Expand Down Expand Up @@ -171,6 +175,13 @@ impl metrics::Metrics for Metrics {
)?,
registry,
)?,
recovered_bytes_total: prometheus::register(
Counter::new(
"polkadot_parachain_availability_recovery_bytes_total",
"Total number of bytes recovered",
)?,
registry,
)?,
chunk_requests_finished: prometheus::register(
CounterVec::new(
Opts::new(
Expand Down
3 changes: 2 additions & 1 deletion polkadot/node/network/availability-recovery/src/task.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ use crate::{
PostRecoveryCheck, LOG_TARGET,
};
use futures::{channel::oneshot, SinkExt};
use parity_scale_codec::Encode;
#[cfg(not(test))]
use polkadot_node_network_protocol::request_response::CHUNK_REQUEST_TIMEOUT;
use polkadot_node_network_protocol::request_response::{
Expand Down Expand Up @@ -432,7 +433,7 @@ where
return Err(err)
},
Ok(data) => {
self.params.metrics.on_recovery_succeeded();
self.params.metrics.on_recovery_succeeded(data.encoded_size());
return Ok(data)
},
}
Expand Down
29 changes: 1 addition & 28 deletions polkadot/node/network/availability-recovery/src/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,12 @@ use parity_scale_codec::Encode;
use polkadot_node_network_protocol::request_response::{
self as req_res, IncomingRequest, Recipient, ReqProtocolNames, Requests,
};
use polkadot_node_subsystem_test_helpers::derive_erasure_chunks_with_proofs_and_root;

use super::*;

use sc_network::{config::RequestResponseConfig, IfDisconnected, OutboundFailure, RequestFailure};

use polkadot_erasure_coding::{branches, obtain_chunks_v1 as obtain_chunks};
use polkadot_node_primitives::{BlockData, PoV, Proof};
use polkadot_node_subsystem::messages::{
AllMessages, NetworkBridgeTxMessage, RuntimeApiMessage, RuntimeApiRequest,
Expand Down Expand Up @@ -456,33 +456,6 @@ fn validator_authority_id(val_ids: &[Sr25519Keyring]) -> Vec<AuthorityDiscoveryI
val_ids.iter().map(|v| v.public().into()).collect()
}

fn derive_erasure_chunks_with_proofs_and_root(
n_validators: usize,
available_data: &AvailableData,
alter_chunk: impl Fn(usize, &mut Vec<u8>),
) -> (Vec<ErasureChunk>, Hash) {
let mut chunks: Vec<Vec<u8>> = obtain_chunks(n_validators, available_data).unwrap();

for (i, chunk) in chunks.iter_mut().enumerate() {
alter_chunk(i, chunk)
}

// create proofs for each erasure chunk
let branches = branches(chunks.as_ref());

let root = branches.root();
let erasure_chunks = branches
.enumerate()
.map(|(index, (proof, chunk))| ErasureChunk {
chunk: chunk.to_vec(),
index: ValidatorIndex(index as _),
proof: Proof::try_from(proof).unwrap(),
})
.collect::<Vec<ErasureChunk>>();

(erasure_chunks, root)
}

impl Default for TestState {
fn default() -> Self {
let validators = vec![
Expand Down
2 changes: 2 additions & 0 deletions polkadot/node/overseer/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,7 @@ impl From<FinalityNotification<Block>> for BlockInfo {

/// An event from outside the overseer scope, such
/// as the substrate framework or user interaction.
#[derive(Debug)]
pub enum Event {
/// A new block was imported.
///
Expand All @@ -300,6 +301,7 @@ pub enum Event {
}

/// Some request from outer world.
#[derive(Debug)]
pub enum ExternalRequest {
/// Wait for the activation of a particular hash
/// and be notified by means of the return channel.
Expand Down
61 changes: 61 additions & 0 deletions polkadot/node/subsystem-bench/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
[package]
name = "polkadot-subsystem-bench"
description = "Subsystem performance benchmark client"
version = "1.0.0"
authors.workspace = true
edition.workspace = true
license.workspace = true
readme = "README.md"
publish = false

[[bin]]
name = "subsystem-bench"
path = "src/subsystem-bench.rs"

# Prevent rustdoc error. Already documented from top-level Cargo.toml.
doc = false

[dependencies]
polkadot-node-subsystem = { path = "../subsystem" }
polkadot-node-subsystem-util = { path = "../subsystem-util" }
polkadot-node-subsystem-types = { path = "../subsystem-types" }
polkadot-node-primitives = { path = "../primitives" }
polkadot-primitives = { path = "../../primitives" }
polkadot-node-network-protocol = { path = "../network/protocol" }
polkadot-availability-recovery = { path = "../network/availability-recovery", features = ["subsystem-benchmarks"] }
color-eyre = { version = "0.6.1", default-features = false }
polkadot-overseer = { path = "../overseer" }
colored = "2.0.4"
assert_matches = "1.5"
async-trait = "0.1.57"
sp-keystore = { path = "../../../substrate/primitives/keystore" }
sc-keystore = { path = "../../../substrate/client/keystore" }
sp-core = { path = "../../../substrate/primitives/core" }
clap = { version = "4.4.6", features = ["derive"] }
futures = "0.3.21"
futures-timer = "3.0.2"
gum = { package = "tracing-gum", path = "../gum" }
polkadot-erasure-coding = { package = "polkadot-erasure-coding", path = "../../erasure-coding" }
log = "0.4.17"
env_logger = "0.9.0"
rand = "0.8.5"
parity-scale-codec = { version = "3.6.1", features = ["derive", "std"] }
tokio = "1.24.2"
clap-num = "1.0.2"
polkadot-node-subsystem-test-helpers = { path = "../subsystem-test-helpers" }
sp-keyring = { path = "../../../substrate/primitives/keyring" }
sp-application-crypto = { path = "../../../substrate/primitives/application-crypto" }
sc-network = { path = "../../../substrate/client/network" }
sc-service = { path = "../../../substrate/client/service" }
polkadot-node-metrics = { path = "../metrics" }
itertools = "0.11.0"
polkadot-primitives-test-helpers = { path = "../../primitives/test-helpers" }
prometheus_endpoint = { package = "substrate-prometheus-endpoint", path = "../../../substrate/utils/prometheus" }
prometheus = { version = "0.13.0", default-features = false }
serde = "1.0.192"
serde_yaml = "0.9"
paste = "1.0.14"
orchestra = { version = "0.3.3", default-features = false, features = ["futures_channel"] }

[features]
default = []
Loading