Skip to content

Commit

Permalink
Revert "fix(tee-prover): mitigate panic on redeployments (#2764)"
Browse files Browse the repository at this point in the history
This reverts commit 178b386.
  • Loading branch information
slowli committed Sep 3, 2024
1 parent 178b386 commit eb06267
Show file tree
Hide file tree
Showing 6 changed files with 93 additions and 61 deletions.
2 changes: 0 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 1 addition & 2 deletions core/bin/zksync_tee_prover/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,8 @@ publish = false
[dependencies]
anyhow.workspace = true
async-trait.workspace = true
envy.workspace = true
reqwest.workspace = true
secp256k1 = { workspace = true, features = ["serde"] }
secp256k1.workspace = true
serde = { workspace = true, features = ["derive"] }
thiserror.workspace = true
tokio = { workspace = true, features = ["full"] }
Expand Down
35 changes: 12 additions & 23 deletions core/bin/zksync_tee_prover/src/config.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
use std::{path::PathBuf, time::Duration};
use std::path::PathBuf;

use secp256k1::SecretKey;
use serde::Deserialize;
use url::Url;
use zksync_env_config::FromEnv;
use zksync_types::tee_types::TeeType;

/// Configuration for the TEE prover.
#[derive(Debug, Clone, Deserialize)]
#[derive(Debug)]
pub(crate) struct TeeProverConfig {
/// The private key used to sign the proofs.
pub signing_key: SecretKey,
Expand All @@ -17,34 +16,24 @@ pub(crate) struct TeeProverConfig {
pub tee_type: TeeType,
/// TEE proof data handler API.
pub api_url: Url,
/// Number of retries for retriable errors before giving up on recovery (i.e., returning an error
/// from [`Self::run()`]).
pub max_retries: usize,
/// Initial back-off interval when retrying recovery on a retriable error. Each subsequent retry interval
/// will be multiplied by [`Self.retry_backoff_multiplier`].
pub initial_retry_backoff: Duration,
/// Multiplier for the back-off interval when retrying recovery on a retriable error.
pub retry_backoff_multiplier: f32,
/// Maximum back-off interval when retrying recovery on a retriable error.
pub max_backoff: Duration,
}

impl FromEnv for TeeProverConfig {
/// Constructs the TEE Prover configuration from environment variables.
///
/// Example usage of environment variables for tests:
/// ```
/// export TEE_PROVER_SIGNING_KEY="b50b38c8d396c88728fc032ece558ebda96907a0b1a9340289715eef7bf29deb"
/// export TEE_PROVER_ATTESTATION_QUOTE_FILE_PATH="/tmp/test" # run `echo test > /tmp/test` beforehand
/// export TEE_PROVER_TEE_TYPE="sgx"
/// export TEE_PROVER_API_URL="http://127.0.0.1:3320"
/// export TEE_PROVER_MAX_RETRIES=10
/// export TEE_PROVER_INITIAL_RETRY_BACKOFF=1
/// export TEE_PROVER_RETRY_BACKOFF_MULTIPLIER=2.0
/// export TEE_PROVER_MAX_BACKOFF=128
/// export TEE_SIGNING_KEY="b50b38c8d396c88728fc032ece558ebda96907a0b1a9340289715eef7bf29deb"
/// export TEE_QUOTE_FILE="/tmp/test" # run `echo test > /tmp/test` beforehand
/// export TEE_TYPE="sgx"
/// export TEE_API_URL="http://127.0.0.1:3320"
/// ```
fn from_env() -> anyhow::Result<Self> {
let config: Self = envy::prefixed("TEE_PROVER_").from_env()?;
Ok(config)
Ok(Self {
signing_key: std::env::var("TEE_SIGNING_KEY")?.parse()?,
attestation_quote_file_path: std::env::var("TEE_QUOTE_FILE")?.parse()?,
tee_type: std::env::var("TEE_TYPE")?.parse()?,
api_url: std::env::var("TEE_API_URL")?.parse()?,
})
}
}
9 changes: 8 additions & 1 deletion core/bin/zksync_tee_prover/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ fn main() -> anyhow::Result<()> {
ObservabilityConfig::from_env().context("ObservabilityConfig::from_env()")?;

let tee_prover_config = TeeProverConfig::from_env()?;
let attestation_quote_bytes = std::fs::read(tee_prover_config.attestation_quote_file_path)?;

let prometheus_config = PrometheusConfig::from_env()?;

let mut builder = ZkStackServiceBuilder::new()?;
Expand All @@ -43,7 +45,12 @@ fn main() -> anyhow::Result<()> {

builder
.add_layer(SigintHandlerLayer)
.add_layer(TeeProverLayer::new(tee_prover_config));
.add_layer(TeeProverLayer::new(
tee_prover_config.api_url,
tee_prover_config.signing_key,
attestation_quote_bytes,
tee_prover_config.tee_type,
));

if let Some(gateway) = prometheus_config.gateway_endpoint() {
let exporter_config =
Expand Down
99 changes: 71 additions & 28 deletions core/bin/zksync_tee_prover/src/tee_prover.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use std::fmt;
use std::{fmt, time::Duration};

use secp256k1::{ecdsa::Signature, Message, PublicKey, Secp256k1};
use secp256k1::{ecdsa::Signature, Message, PublicKey, Secp256k1, SecretKey};
use url::Url;
use zksync_basic_types::H256;
use zksync_node_framework::{
service::StopReceiver,
Expand All @@ -10,21 +11,32 @@ use zksync_node_framework::{
};
use zksync_prover_interface::inputs::TeeVerifierInput;
use zksync_tee_verifier::Verify;
use zksync_types::L1BatchNumber;
use zksync_types::{tee_types::TeeType, L1BatchNumber};

use crate::{
api_client::TeeApiClient, config::TeeProverConfig, error::TeeProverError, metrics::METRICS,
};
use crate::{api_client::TeeApiClient, error::TeeProverError, metrics::METRICS};

/// Wiring layer for `TeeProver`
#[derive(Debug)]
pub(crate) struct TeeProverLayer {
config: TeeProverConfig,
api_url: Url,
signing_key: SecretKey,
attestation_quote_bytes: Vec<u8>,
tee_type: TeeType,
}

impl TeeProverLayer {
pub fn new(config: TeeProverConfig) -> Self {
Self { config }
pub fn new(
api_url: Url,
signing_key: SecretKey,
attestation_quote_bytes: Vec<u8>,
tee_type: TeeType,
) -> Self {
Self {
api_url,
signing_key,
attestation_quote_bytes,
tee_type,
}
}
}

Expand All @@ -44,24 +56,34 @@ impl WiringLayer for TeeProverLayer {
}

async fn wire(self, _input: Self::Input) -> Result<Self::Output, WiringError> {
let api_url = self.config.api_url.clone();
let tee_prover = TeeProver {
config: self.config,
api_client: TeeApiClient::new(api_url),
config: Default::default(),
signing_key: self.signing_key,
public_key: self.signing_key.public_key(&Secp256k1::new()),
attestation_quote_bytes: self.attestation_quote_bytes,
tee_type: self.tee_type,
api_client: TeeApiClient::new(self.api_url),
};
Ok(LayerOutput { tee_prover })
}
}

pub(crate) struct TeeProver {
config: TeeProverConfig,
signing_key: SecretKey,
public_key: PublicKey,
attestation_quote_bytes: Vec<u8>,
tee_type: TeeType,
api_client: TeeApiClient,
}

impl fmt::Debug for TeeProver {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("TeeProver")
.field("config", &self.config)
.field("public_key", &self.public_key)
.field("attestation_quote_bytes", &self.attestation_quote_bytes)
.field("tee_type", &self.tee_type)
.finish()
}
}
Expand All @@ -79,7 +101,7 @@ impl TeeProver {
let batch_number = verification_result.batch_number;
let msg_to_sign = Message::from_slice(root_hash_bytes)
.map_err(|e| TeeProverError::Verification(e.into()))?;
let signature = self.config.signing_key.sign_ecdsa(msg_to_sign);
let signature = self.signing_key.sign_ecdsa(msg_to_sign);
observer.observe();
Ok((signature, batch_number, verification_result.value_hash))
}
Expand All @@ -89,17 +111,17 @@ impl TeeProver {
}
}

async fn step(&self, public_key: &PublicKey) -> Result<Option<L1BatchNumber>, TeeProverError> {
match self.api_client.get_job(self.config.tee_type).await? {
async fn step(&self) -> Result<Option<L1BatchNumber>, TeeProverError> {
match self.api_client.get_job(self.tee_type).await? {
Some(job) => {
let (signature, batch_number, root_hash) = self.verify(*job)?;
self.api_client
.submit_proof(
batch_number,
signature,
public_key,
&self.public_key,
root_hash,
self.config.tee_type,
self.tee_type,
)
.await?;
Ok(Some(batch_number))
Expand All @@ -112,6 +134,30 @@ impl TeeProver {
}
}

/// TEE prover configuration options.
#[derive(Debug, Clone)]
pub struct TeeProverConfig {
/// Number of retries for retriable errors before giving up on recovery (i.e., returning an error
/// from [`Self::run()`]).
pub max_retries: usize,
/// Initial back-off interval when retrying recovery on a retriable error. Each subsequent retry interval
/// will be multiplied by [`Self.retry_backoff_multiplier`].
pub initial_retry_backoff: Duration,
pub retry_backoff_multiplier: f32,
pub max_backoff: Duration,
}

impl Default for TeeProverConfig {
fn default() -> Self {
Self {
max_retries: 5,
initial_retry_backoff: Duration::from_secs(1),
retry_backoff_multiplier: 2.0,
max_backoff: Duration::from_secs(128),
}
}
}

#[async_trait::async_trait]
impl Task for TeeProver {
fn id(&self) -> TaskId {
Expand All @@ -121,27 +167,24 @@ impl Task for TeeProver {
async fn run(self: Box<Self>, mut stop_receiver: StopReceiver) -> anyhow::Result<()> {
tracing::info!("Starting the task {}", self.id());

let config = &self.config;
let attestation_quote_bytes = std::fs::read(&config.attestation_quote_file_path)?;
let public_key = config.signing_key.public_key(&Secp256k1::new());
self.api_client
.register_attestation(attestation_quote_bytes, &public_key)
.register_attestation(self.attestation_quote_bytes.clone(), &self.public_key)
.await?;

let mut retries = 1;
let mut backoff = config.initial_retry_backoff;
let mut backoff = self.config.initial_retry_backoff;
let mut observer = METRICS.job_waiting_time.start();

loop {
if *stop_receiver.0.borrow() {
tracing::info!("Stop signal received, shutting down TEE Prover component");
return Ok(());
}
let result = self.step(&public_key).await;
let result = self.step().await;
let need_to_sleep = match result {
Ok(batch_number) => {
retries = 1;
backoff = config.initial_retry_backoff;
backoff = self.config.initial_retry_backoff;
if let Some(batch_number) = batch_number {
observer.observe();
observer = METRICS.job_waiting_time.start();
Expand All @@ -155,14 +198,14 @@ impl Task for TeeProver {
}
Err(err) => {
METRICS.network_errors_counter.inc_by(1);
if !err.is_retriable() || retries > config.max_retries {
if !err.is_retriable() || retries > self.config.max_retries {
return Err(err.into());
}
tracing::warn!(%err, "Failed TEE prover step function {retries}/{}, retrying in {} milliseconds.", config.max_retries, backoff.as_millis());
tracing::warn!(%err, "Failed TEE prover step function {retries}/{}, retrying in {} milliseconds.", self.config.max_retries, backoff.as_millis());
retries += 1;
backoff = std::cmp::min(
backoff.mul_f32(config.retry_backoff_multiplier),
config.max_backoff,
backoff.mul_f32(self.config.retry_backoff_multiplier),
self.config.max_backoff,
);
true
}
Expand Down
6 changes: 1 addition & 5 deletions etc/nix/container-tee_prover.nix
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,7 @@ nixsgxLib.mkSGXContainer {
log_level = "error";

env = {
TEE_PROVER_API_URL.passthrough = true;
TEE_PROVER_MAX_RETRIES.passthrough = true;
TEE_PROVER_INITIAL_RETRY_BACKOFF_SECONDS.passthrough = true;
TEE_PROVER_RETRY_BACKOFF_MULTIPLIER.passthrough = true;
TEE_PROVER_MAX_BACKOFF_SECONDS.passthrough = true;
TEE_API_URL.passthrough = true;
API_PROMETHEUS_LISTENER_PORT.passthrough = true;
API_PROMETHEUS_PUSHGATEWAY_URL.passthrough = true;
API_PROMETHEUS_PUSH_INTERVAL_MS.passthrough = true;
Expand Down

0 comments on commit eb06267

Please sign in to comment.