Skip to content

Commit

Permalink
PVF worker: switch on seccomp networking restrictions (#2221)
Browse files Browse the repository at this point in the history
  • Loading branch information
mrcnski authored Nov 21, 2023
1 parent 40afc77 commit 552be48
Show file tree
Hide file tree
Showing 13 changed files with 202 additions and 146 deletions.
7 changes: 4 additions & 3 deletions polkadot/node/core/pvf/common/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ pub enum PrepareError {
#[codec(index = 9)]
ClearWorkerDir(String),
/// The preparation job process died, due to OOM, a seccomp violation, or some other factor.
JobDied(String),
JobDied { err: String, job_pid: i32 },
#[codec(index = 10)]
/// Some error occurred when interfacing with the kernel.
#[codec(index = 11)]
Expand All @@ -96,7 +96,7 @@ impl PrepareError {
match self {
Prevalidation(_) | Preparation(_) | JobError(_) | OutOfMemory => true,
IoErr(_) |
JobDied(_) |
JobDied { .. } |
CreateTmpFile(_) |
RenameTmpFile { .. } |
ClearWorkerDir(_) |
Expand All @@ -119,7 +119,8 @@ impl fmt::Display for PrepareError {
JobError(err) => write!(f, "panic: {}", err),
TimedOut => write!(f, "prepare: timeout"),
IoErr(err) => write!(f, "prepare: io error while receiving response: {}", err),
JobDied(err) => write!(f, "prepare: prepare job died: {}", err),
JobDied { err, job_pid } =>
write!(f, "prepare: prepare job with pid {job_pid} died: {err}"),
CreateTmpFile(err) => write!(f, "prepare: error creating tmp file: {}", err),
RenameTmpFile { err, src, dest } =>
write!(f, "prepare: error renaming tmp file ({:?} -> {:?}): {}", src, dest, err),
Expand Down
2 changes: 1 addition & 1 deletion polkadot/node/core/pvf/common/src/execute.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ pub enum WorkerResponse {
///
/// We cannot treat this as an internal error because malicious code may have killed the job.
/// We still retry it, because in the non-malicious case it is likely spurious.
JobDied(String),
JobDied { err: String, job_pid: i32 },
/// An unexpected error occurred in the job process, e.g. failing to spawn a thread, panic,
/// etc.
///
Expand Down
2 changes: 1 addition & 1 deletion polkadot/node/core/pvf/common/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ pub struct SecurityStatus {
pub can_enable_landlock: bool,
/// Whether the seccomp features we use are fully available on this system.
pub can_enable_seccomp: bool,
// Whether we are able to unshare the user namespace and change the filesystem root.
/// Whether we are able to unshare the user namespace and change the filesystem root.
pub can_unshare_user_namespace_and_change_root: bool,
}

Expand Down
2 changes: 1 addition & 1 deletion polkadot/node/core/pvf/common/src/worker/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,7 @@ pub fn run_worker<F>(
#[cfg_attr(not(target_os = "linux"), allow(unused_mut))] mut worker_dir_path: PathBuf,
node_version: Option<&str>,
worker_version: Option<&str>,
#[cfg_attr(not(target_os = "linux"), allow(unused_variables))] security_status: &SecurityStatus,
security_status: &SecurityStatus,
mut event_loop: F,
) where
F: FnMut(UnixStream, PathBuf) -> io::Result<Never>,
Expand Down
10 changes: 4 additions & 6 deletions polkadot/node/core/pvf/common/src/worker/security/seccomp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -67,11 +67,9 @@
//!
//! # Action on syscall violations
//!
//! On syscall violations we currently only log, to make sure this works correctly before enforcing.
//!
//! In the future, when a forbidden syscall is attempted we immediately kill the process in order to
//! prevent the attacker from doing anything else. In execution, this will result in voting against
//! the candidate.
//! When a forbidden syscall is attempted we immediately kill the process in order to prevent the
//! attacker from doing anything else. In execution, this will result in voting against the
//! candidate.

use crate::{
worker::{stringify_panic_payload, WorkerKind},
Expand All @@ -82,7 +80,7 @@ use std::{collections::BTreeMap, path::Path};

/// The action to take on caught syscalls.
#[cfg(not(test))]
const CAUGHT_ACTION: SeccompAction = SeccompAction::Log;
const CAUGHT_ACTION: SeccompAction = SeccompAction::KillProcess;
/// Don't kill the process when testing.
#[cfg(test)]
const CAUGHT_ACTION: SeccompAction = SeccompAction::Errno(libc::EACCES as u32);
Expand Down
26 changes: 13 additions & 13 deletions polkadot/node/core/pvf/execute-worker/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -222,12 +222,6 @@ pub fn worker_entrypoint(
},
};

gum::trace!(
target: LOG_TARGET,
%worker_pid,
"worker: sending response to host: {:?}",
response
);
send_response(&mut stream, response)?;
}
},
Expand Down Expand Up @@ -360,7 +354,7 @@ fn handle_child_process(
/// - The response, either `Ok` or some error state.
fn handle_parent_process(
mut pipe_read: PipeReader,
child: Pid,
job_pid: Pid,
worker_pid: u32,
usage_before: Usage,
timeout: Duration,
Expand All @@ -373,10 +367,11 @@ fn handle_parent_process(
// Should retry at any rate.
.map_err(|err| io::Error::new(io::ErrorKind::Other, err.to_string()))?;

let status = nix::sys::wait::waitpid(child, None);
let status = nix::sys::wait::waitpid(job_pid, None);
gum::trace!(
target: LOG_TARGET,
%worker_pid,
%job_pid,
"execute worker received wait status from job: {:?}",
status,
);
Expand All @@ -396,6 +391,7 @@ fn handle_parent_process(
gum::warn!(
target: LOG_TARGET,
%worker_pid,
%job_pid,
"execute job took {}ms cpu time, exceeded execute timeout {}ms",
cpu_tv.as_millis(),
timeout.as_millis(),
Expand Down Expand Up @@ -428,6 +424,7 @@ fn handle_parent_process(
gum::warn!(
target: LOG_TARGET,
%worker_pid,
%job_pid,
"execute job error: {}",
job_error,
);
Expand All @@ -443,15 +440,18 @@ fn handle_parent_process(
//
// The job gets SIGSYS on seccomp violations, but this signal may have been sent for some
// other reason, so we still need to check for seccomp violations elsewhere.
Ok(WaitStatus::Signaled(_pid, signal, _core_dump)) =>
Ok(WorkerResponse::JobDied(format!("received signal: {signal:?}"))),
Ok(WaitStatus::Signaled(_pid, signal, _core_dump)) => Ok(WorkerResponse::JobDied {
err: format!("received signal: {signal:?}"),
job_pid: job_pid.as_raw(),
}),
Err(errno) => Ok(internal_error_from_errno("waitpid", errno)),

// It is within an attacker's power to send an unexpected exit status. So we cannot treat
// this as an internal error (which would make us abstain), but must vote against.
Ok(unexpected_wait_status) => Ok(WorkerResponse::JobDied(format!(
"unexpected status from wait: {unexpected_wait_status:?}"
))),
Ok(unexpected_wait_status) => Ok(WorkerResponse::JobDied {
err: format!("unexpected status from wait: {unexpected_wait_status:?}"),
job_pid: job_pid.as_raw(),
}),
}
}

Expand Down
24 changes: 15 additions & 9 deletions polkadot/node/core/pvf/prepare-worker/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -257,9 +257,9 @@ pub fn worker_entrypoint(

handle_parent_process(
pipe_reader,
worker_pid,
child,
temp_artifact_dest.clone(),
worker_pid,
usage_before,
preparation_timeout,
)
Expand Down Expand Up @@ -506,9 +506,9 @@ fn handle_child_process(
/// - If the child process timeout, it returns `PrepareError::TimedOut`.
fn handle_parent_process(
mut pipe_read: PipeReader,
child: Pid,
temp_artifact_dest: PathBuf,
worker_pid: u32,
job_pid: Pid,
temp_artifact_dest: PathBuf,
usage_before: Usage,
timeout: Duration,
) -> Result<PrepareWorkerSuccess, PrepareError> {
Expand All @@ -518,10 +518,11 @@ fn handle_parent_process(
.read_to_end(&mut received_data)
.map_err(|err| PrepareError::IoErr(err.to_string()))?;

let status = nix::sys::wait::waitpid(child, None);
let status = nix::sys::wait::waitpid(job_pid, None);
gum::trace!(
target: LOG_TARGET,
%worker_pid,
%job_pid,
"prepare worker received wait status from job: {:?}",
status,
);
Expand All @@ -539,6 +540,7 @@ fn handle_parent_process(
gum::warn!(
target: LOG_TARGET,
%worker_pid,
%job_pid,
"prepare job took {}ms cpu time, exceeded prepare timeout {}ms",
cpu_tv.as_millis(),
timeout.as_millis(),
Expand Down Expand Up @@ -573,6 +575,7 @@ fn handle_parent_process(
gum::debug!(
target: LOG_TARGET,
%worker_pid,
%job_pid,
"worker: writing artifact to {}",
temp_artifact_dest.display(),
);
Expand All @@ -593,15 +596,18 @@ fn handle_parent_process(
//
// The job gets SIGSYS on seccomp violations, but this signal may have been sent for some
// other reason, so we still need to check for seccomp violations elsewhere.
Ok(WaitStatus::Signaled(_pid, signal, _core_dump)) =>
Err(PrepareError::JobDied(format!("received signal: {signal:?}"))),
Ok(WaitStatus::Signaled(_pid, signal, _core_dump)) => Err(PrepareError::JobDied {
err: format!("received signal: {signal:?}"),
job_pid: job_pid.as_raw(),
}),
Err(errno) => Err(error_from_errno("waitpid", errno)),

// An attacker can make the child process return any exit status it wants. So we can treat
// all unexpected cases the same way.
Ok(unexpected_wait_status) => Err(PrepareError::JobDied(format!(
"unexpected status from wait: {unexpected_wait_status:?}"
))),
Ok(unexpected_wait_status) => Err(PrepareError::JobDied {
err: format!("unexpected status from wait: {unexpected_wait_status:?}"),
job_pid: job_pid.as_raw(),
}),
}
}

Expand Down
111 changes: 62 additions & 49 deletions polkadot/node/core/pvf/src/execute/worker_intf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ use polkadot_node_core_pvf_common::{
execute::{Handshake, WorkerResponse},
worker_dir, SecurityStatus,
};
use polkadot_parachain_primitives::primitives::ValidationResult;
use polkadot_parachain_primitives::primitives::{ValidationCodeHash, ValidationResult};
use polkadot_primitives::ExecutorParams;
use std::{path::Path, time::Duration};
use tokio::{io, net::UnixStream};
Expand Down Expand Up @@ -156,6 +156,16 @@ pub async fn start_work(
let response = futures::select! {
response = recv_response(&mut stream).fuse() => {
match response {
Ok(response) =>
handle_response(
response,
pid,
&artifact.id.code_hash,
&artifact_path,
execution_timeout,
audit_log_file
)
.await,
Err(error) => {
gum::warn!(
target: LOG_TARGET,
Expand All @@ -164,56 +174,9 @@ pub async fn start_work(
?error,
"failed to recv an execute response",
);
// The worker died. Check if it was due to a seccomp violation.
//
// NOTE: Log, but don't change the outcome. Not all validators may have
// auditing enabled, so we don't want attackers to abuse a non-deterministic
// outcome.
for syscall in security::check_seccomp_violations_for_worker(audit_log_file, pid).await {
gum::error!(
target: LOG_TARGET,
worker_pid = %pid,
%syscall,
validation_code_hash = ?artifact.id.code_hash,
?artifact_path,
"A forbidden syscall was attempted! This is a violation of our seccomp security policy. Report an issue ASAP!"
);
}

return Outcome::WorkerIntfErr
},
Ok(response) => {
// Check if any syscall violations occurred during the job. For now this is
// only informative, as we are not enforcing the seccomp policy yet.
for syscall in security::check_seccomp_violations_for_worker(audit_log_file, pid).await {
gum::error!(
target: LOG_TARGET,
worker_pid = %pid,
%syscall,
validation_code_hash = ?artifact.id.code_hash,
?artifact_path,
"A forbidden syscall was attempted! This is a violation of our seccomp security policy. Report an issue ASAP!"
);
}

if let WorkerResponse::Ok{duration, ..} = response {
if duration > execution_timeout {
// The job didn't complete within the timeout.
gum::warn!(
target: LOG_TARGET,
worker_pid = %pid,
"execute job took {}ms cpu time, exceeded execution timeout {}ms.",
duration.as_millis(),
execution_timeout.as_millis(),
);

// Return a timeout error.
return Outcome::HardTimeout
}
}

response
},
}
},
_ = Delay::new(timeout).fuse() => {
Expand All @@ -238,7 +201,7 @@ pub async fn start_work(
idle_worker: IdleWorker { stream, pid, worker_dir },
},
WorkerResponse::JobTimedOut => Outcome::HardTimeout,
WorkerResponse::JobDied(err) => Outcome::JobDied { err },
WorkerResponse::JobDied { err, job_pid: _ } => Outcome::JobDied { err },
WorkerResponse::JobError(err) => Outcome::JobError { err },

WorkerResponse::InternalError(err) => Outcome::InternalError { err },
Expand All @@ -247,6 +210,56 @@ pub async fn start_work(
.await
}

/// Handles the case where we successfully received response bytes on the host from the child.
///
/// Here we know the artifact exists, but is still located in a temporary file which will be cleared
/// by [`with_worker_dir_setup`].
async fn handle_response(
response: WorkerResponse,
worker_pid: u32,
validation_code_hash: &ValidationCodeHash,
artifact_path: &Path,
execution_timeout: Duration,
audit_log_file: Option<security::AuditLogFile>,
) -> WorkerResponse {
if let WorkerResponse::Ok { duration, .. } = response {
if duration > execution_timeout {
// The job didn't complete within the timeout.
gum::warn!(
target: LOG_TARGET,
worker_pid,
"execute job took {}ms cpu time, exceeded execution timeout {}ms.",
duration.as_millis(),
execution_timeout.as_millis(),
);

// Return a timeout error.
return WorkerResponse::JobTimedOut
}
}

if let WorkerResponse::JobDied { err: _, job_pid } = response {
// The job died. Check if it was due to a seccomp violation.
//
// NOTE: Log, but don't change the outcome. Not all validators may have
// auditing enabled, so we don't want attackers to abuse a non-deterministic
// outcome.
for syscall in security::check_seccomp_violations_for_job(audit_log_file, job_pid).await {
gum::error!(
target: LOG_TARGET,
%worker_pid,
%job_pid,
%syscall,
?validation_code_hash,
?artifact_path,
"A forbidden syscall was attempted! This is a violation of our seccomp security policy. Report an issue ASAP!"
);
}
}

response
}

/// Create a temporary file for an artifact in the worker cache, execute the given future/closure
/// passing the file path in, and clean up the worker cache.
///
Expand Down
4 changes: 2 additions & 2 deletions polkadot/node/core/pvf/src/prepare/pool.rs
Original file line number Diff line number Diff line change
Expand Up @@ -388,14 +388,14 @@ fn handle_mux(
Ok(())
},
// The worker might still be usable, but we kill it just in case.
Outcome::JobDied(err) => {
Outcome::JobDied { err, job_pid } => {
if attempt_retire(metrics, spawned, worker) {
reply(
from_pool,
FromPool::Concluded {
worker,
rip: true,
result: Err(PrepareError::JobDied(err)),
result: Err(PrepareError::JobDied { err, job_pid }),
},
)?;
}
Expand Down
Loading

0 comments on commit 552be48

Please sign in to comment.