Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PVF: Add worker check during tests and benches #1771

Merged
merged 16 commits into from
Oct 24, 2023
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion polkadot/cli/src/command.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ impl SubstrateCli for Cli {

fn impl_version() -> String {
let commit_hash = env!("SUBSTRATE_CLI_COMMIT_HASH");
format!("{NODE_VERSION}-{commit_hash}")
format!("{}-{commit_hash}", NODE_VERSION)
}

fn description() -> String {
Expand Down
13 changes: 9 additions & 4 deletions polkadot/node/core/pvf/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ cfg-if = "1.0"
futures = "0.3.21"
futures-timer = "3.0.2"
gum = { package = "tracing-gum", path = "../../gum" }
is_executable = "1.0.1"
libc = "0.2.139"
pin-project = "1.0.9"
rand = "0.8.5"
Expand All @@ -34,19 +35,23 @@ sp-maybe-compressed-blob = { path = "../../../../substrate/primitives/maybe-comp
polkadot-node-core-pvf-prepare-worker = { path = "prepare-worker", optional = true }
polkadot-node-core-pvf-execute-worker = { path = "execute-worker", optional = true }

[build-dependencies]
substrate-build-script-utils = { path = "../../../../substrate/utils/build-script-utils" }

[dev-dependencies]
assert_matches = "1.4.0"
criterion = { version = "0.4.0", default-features = false, features = ["cargo_bench_support", "async_tokio"] }
hex-literal = "0.4.1"
polkadot-node-core-pvf-common = { path = "common", features = ["test-utils"] }
# For the puppet worker, depend on ourselves with the test-utils feature.
# For benches and integration tests, depend on ourselves with the test-utils
# feature.
polkadot-node-core-pvf = { path = ".", features = ["test-utils"] }
rococo-runtime = { path = "../../../runtime/rococo" }

adder = { package = "test-parachain-adder", path = "../../../parachain/test-parachains/adder" }
halt = { package = "test-parachain-halt", path = "../../../parachain/test-parachains/halt" }

[[bench]]
name = "host_rococo_runtime"
harness = false

[features]
ci-only-tests = []
jemalloc-allocator = [ "polkadot-node-core-pvf-common/jemalloc-allocator" ]
Expand Down
220 changes: 220 additions & 0 deletions polkadot/node/core/pvf/benches/host_rococo_runtime.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,220 @@
// Copyright (C) Parity Technologies (UK) Ltd.
// This file is part of Polkadot.

// Polkadot is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.

// Polkadot is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.

// You should have received a copy of the GNU General Public License
// along with Polkadot. If not, see <http://www.gnu.org/licenses/>.

//! Benchmarks for preparation and execution through the host.
//!
//! # Examples
//!
//! $ cargo bench
//! $ cargo bench prepare
//! $ cargo bench execute

use criterion::{criterion_group, criterion_main, BatchSize, Criterion, SamplingMode};
use parity_scale_codec::Encode;
use polkadot_node_core_pvf::{
start, testing, Config, Metrics, PrepareError, PrepareJobKind, PrepareStats, PvfPrepData,
ValidationError, ValidationHost,
};
use polkadot_parachain_primitives::primitives::{BlockData, ValidationParams, ValidationResult};
use polkadot_primitives::ExecutorParams;
use std::time::Duration;
use tokio::{runtime::Handle, sync::Mutex};

const TEST_EXECUTION_TIMEOUT: Duration = Duration::from_secs(3);
const TEST_PREPARATION_TIMEOUT: Duration = Duration::from_secs(30);

struct TestHost {
host: Mutex<ValidationHost>,
}

impl TestHost {
fn new_with_config<F>(handle: &Handle, f: F) -> Self
where
F: FnOnce(&mut Config),
{
let (prepare_worker_path, execute_worker_path) = testing::get_and_check_worker_paths();

let cache_dir = tempfile::tempdir().unwrap();
let mut config = Config::new(
cache_dir.path().to_owned(),
None,
prepare_worker_path,
execute_worker_path,
);
f(&mut config);
let (host, task) = start(config, Metrics::default());
let _ = handle.spawn(task);
Self { host: Mutex::new(host) }
}

async fn precheck_pvf(
&self,
code: &[u8],
executor_params: ExecutorParams,
) -> Result<PrepareStats, PrepareError> {
let (result_tx, result_rx) = futures::channel::oneshot::channel();

let code = sp_maybe_compressed_blob::decompress(code, 16 * 1024 * 1024)
.expect("Compression works");

self.host
.lock()
.await
.precheck_pvf(
PvfPrepData::from_code(
code.into(),
executor_params,
TEST_PREPARATION_TIMEOUT,
PrepareJobKind::Prechecking,
),
result_tx,
)
.await
.unwrap();
result_rx.await.unwrap()
}

async fn validate_candidate(
&self,
code: &[u8],
params: ValidationParams,
executor_params: ExecutorParams,
) -> Result<ValidationResult, ValidationError> {
let (result_tx, result_rx) = futures::channel::oneshot::channel();

let code = sp_maybe_compressed_blob::decompress(code, 16 * 1024 * 1024)
.expect("Compression works");

self.host
.lock()
.await
.execute_pvf(
PvfPrepData::from_code(
code.into(),
executor_params,
TEST_PREPARATION_TIMEOUT,
PrepareJobKind::Compilation,
),
TEST_EXECUTION_TIMEOUT,
params.encode(),
polkadot_node_core_pvf::Priority::Normal,
result_tx,
)
.await
.unwrap();
result_rx.await.unwrap()
}
}

fn host_prepare_rococo_runtime(c: &mut Criterion) {
polkadot_node_core_pvf_common::sp_tracing::try_init_simple();

let rt = tokio::runtime::Runtime::new().unwrap();

let blob = rococo_runtime::WASM_BINARY.unwrap();
let pvf = match sp_maybe_compressed_blob::decompress(&blob, 64 * 1024 * 1024) {
Ok(code) => PvfPrepData::from_code(
code.into_owned(),
ExecutorParams::default(),
Duration::from_secs(360),
PrepareJobKind::Compilation,
),
Err(e) => {
panic!("Cannot decompress blob: {:?}", e);
},
};

let mut group = c.benchmark_group("prepare rococo");
group.sampling_mode(SamplingMode::Flat);
group.sample_size(20);
group.measurement_time(Duration::from_secs(240));
group.bench_function("host: prepare Rococo runtime", |b| {
b.to_async(&rt).iter_batched(
|| {
(
TestHost::new_with_config(rt.handle(), |cfg| {
cfg.prepare_workers_hard_max_num = 1;
}),
pvf.clone().code(),
)
},
|(host, pvf_code)| async move {
// `PvfPrepData` is designed to be cheap to clone, so cloning shouldn't affect the
// benchmark accuracy.
let _stats = host.precheck_pvf(&pvf_code, Default::default()).await.unwrap();
},
BatchSize::SmallInput,
)
});
group.finish();
}

fn host_execute_rococo_runtime(c: &mut Criterion) {
polkadot_node_core_pvf_common::sp_tracing::try_init_simple();

let rt = tokio::runtime::Runtime::new().unwrap();

let host = TestHost::new_with_config(rt.handle(), |cfg| {
cfg.execute_workers_max_num = 1;
});

let blob = rococo_runtime::WASM_BINARY.unwrap();
let pvf = match sp_maybe_compressed_blob::decompress(&blob, 64 * 1024 * 1024) {
Ok(code) => PvfPrepData::from_code(
code.into_owned(),
ExecutorParams::default(),
Duration::from_secs(360),
PrepareJobKind::Compilation,
),
Err(e) => {
panic!("Cannot decompress blob: {:?}", e);
},
};

// Prepare beforehand, so that the benchmark only measures execution.
rt.block_on(async {
let _stats = host.precheck_pvf(&pvf.code(), Default::default()).await.unwrap();
});

let mut group = c.benchmark_group("execute rococo");
group.sampling_mode(SamplingMode::Flat);
group.sample_size(20);
group.measurement_time(Duration::from_secs(240));
group.bench_function("host: prepare Rococo runtime", |b| {
b.to_async(&rt).iter(|| async {
// `PvfPrepData` is designed to be cheap to clone, so cloning shouldn't affect the
// benchmark accuracy.
let _result = host
.validate_candidate(
&pvf.code(),
ValidationParams {
block_data: BlockData(Vec::new()),
parent_head: Default::default(),
relay_parent_number: 1,
relay_parent_storage_root: Default::default(),
},
Default::default(),
)
.await
.unwrap();
})
});
group.finish();
}

criterion_group!(prepare, host_prepare_rococo_runtime);
criterion_group!(execute, host_execute_rococo_runtime);
criterion_main!(prepare, execute);
17 changes: 0 additions & 17 deletions polkadot/node/core/pvf/bin/puppet_worker.rs

This file was deleted.

1 change: 0 additions & 1 deletion polkadot/node/core/pvf/common/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,5 @@ tempfile = "3.3.0"

[features]
# This feature is used to export test code to other crates without putting it in the production build.
# Also used for building the puppet worker.
test-utils = []
jemalloc-allocator = []
12 changes: 11 additions & 1 deletion polkadot/node/core/pvf/common/src/worker/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,14 @@ use tokio::{io, runtime::Runtime};
/// spawning the desired worker.
#[macro_export]
macro_rules! decl_worker_main {
($expected_command:expr, $entrypoint:expr, $worker_version:expr $(,)*) => {
($expected_command:expr, $entrypoint:expr, $worker_version:expr, $worker_version_hash:expr $(,)*) => {
fn get_full_version() -> String {
format!("{}-{}", $worker_version, $worker_version_hash)
}

fn print_help(expected_command: &str) {
println!("{} {}", expected_command, $worker_version);
println!("commit: {}", $worker_version_hash);
println!();
println!("PVF worker that is called by polkadot.");
}
Expand Down Expand Up @@ -67,6 +72,11 @@ macro_rules! decl_worker_main {
println!("{}", $worker_version);
return
},
// Useful for debugging. --version is used for version checks.
"--full-version" => {
println!("{}", get_full_version());
return
},

"--check-can-enable-landlock" => {
#[cfg(target_os = "linux")]
Expand Down
1 change: 1 addition & 0 deletions polkadot/node/core/pvf/src/artifacts.rs
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,7 @@ impl ArtifactPathId {
}
}

#[derive(Debug)]
pub enum ArtifactState {
/// The artifact is ready to be used by the executor.
///
Expand Down
10 changes: 5 additions & 5 deletions polkadot/node/core/pvf/src/host.rs
Original file line number Diff line number Diff line change
Expand Up @@ -446,7 +446,8 @@ async fn handle_to_host(
/// This tries to prepare the PVF by compiling the WASM blob within a timeout set in
/// `PvfPrepData`.
///
/// If the prepare job failed previously, we may retry it under certain conditions.
/// We don't retry artifacts that previously failed preparation. We don't expect multiple
/// pre-checking requests.
async fn handle_precheck_pvf(
artifacts: &mut Artifacts,
prepare_queue: &mut mpsc::Sender<prepare::ToQueue>,
Expand All @@ -464,8 +465,7 @@ async fn handle_precheck_pvf(
ArtifactState::Preparing { waiting_for_response, num_failures: _ } =>
waiting_for_response.push(result_sender),
ArtifactState::FailedToProcess { error, .. } => {
// Do not retry failed preparation if another pre-check request comes in. We do not
// retry pre-checking, anyway.
// Do not retry an artifact that previously failed preparation.
let _ = result_sender.send(PrepareResult::Err(error.clone()));
},
}
Expand Down Expand Up @@ -764,7 +764,7 @@ async fn handle_prepare_done(
let last_time_failed = SystemTime::now();
let num_failures = *num_failures + 1;

gum::warn!(
gum::error!(
target: LOG_TARGET,
?artifact_id,
time_failed = ?last_time_failed,
Expand Down Expand Up @@ -846,7 +846,7 @@ async fn sweeper_task(mut sweeper_rx: mpsc::Receiver<PathBuf>) {
gum::trace!(
target: LOG_TARGET,
?result,
"Sweeping the artifact file {}",
"Sweeped the artifact file {}",
condemned.display(),
);
},
Expand Down
Loading