Skip to content

Commit

Permalink
Fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
pbeza committed Oct 31, 2024
1 parent 273bba6 commit bfeddc9
Show file tree
Hide file tree
Showing 6 changed files with 37 additions and 44 deletions.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 5 additions & 3 deletions core/lib/dal/doc/TeeProofGenerationDal.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,11 @@
title: Status Diagram
---
stateDiagram-v2
[*] --> unpicked : insert_tee_proof_generation_job
unpicked --> picked_by_prover : lock_batch_for_proving
[*] --> picked_by_prover : lock
picked_by_prover --> generated : save_proof_artifacts_metadata
picked_by_prover --> unpicked : unlock_batch
picked_by_prover --> permanently_ignored : unlock_batch
picked_by_prover --> failed : unlock_batch
failed --> picked_by_prover : lock
permanently_ignored --> [*]
generated --> [*]
```
Original file line number Diff line number Diff line change
@@ -1,2 +1,8 @@
-- there were manually added tee_proof_generation_details with status 'permanently_ignore'
-- There were manually added tee_proof_generation_details entries with status 'permanently_ignore'.

UPDATE tee_proof_generation_details SET status = 'permanently_ignored' WHERE status = 'permanently_ignore';

-- Entries with the status 'unpicked' were not used at all after the migration to the logic
-- introduced in https://github.com/matter-labs/zksync-era/pull/3017. This was overlooked.

DELETE FROM tee_proof_generation_details WHERE status = 'unpicked';
35 changes: 17 additions & 18 deletions core/lib/dal/src/tee_proof_generation_dal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,27 +23,29 @@ pub struct TeeProofGenerationDal<'a, 'c> {

#[derive(Debug, Clone, Copy, EnumString, Display)]
pub enum TeeProofGenerationJobStatus {
#[strum(serialize = "unpicked")]
Unpicked,
#[strum(serialize = "picked_by_prover")]
PickedByProver,
#[strum(serialize = "generated")]
Generated,
#[strum(serialize = "failed")]
Failed,
#[strum(serialize = "permanently_ignored")]
PermanentlyIgnored,
}

/// Representation of a locked batch. A batch is locked when taken by a TEE prover
/// ([TeeProofGenerationJobStatus::PickedByProver]) and the TEE proof is not yet submitted. It can
/// be unlocked when the proof is submitted ([TeeProofGenerationJobStatus::Generated]) or when it is
/// considered permanently ignored ([TeeProofGenerationJobStatus::PermanentlyIgnored]), e.g., if
/// proof inputs are unavailable for an extended period.
/// Represents a locked batch picked by a TEE prover. A batch is locked when taken by a TEE prover
/// ([TeeProofGenerationJobStatus::PickedByProver]). It can transition to one of three states:
/// 1. [TeeProofGenerationJobStatus::Generated] when the proof is successfully submitted.
/// 2. [TeeProofGenerationJobStatus::Failed] when the proof generation fails, which can happen if
/// its inputs (GCS blob files) are incomplete or the API is unavailable for an extended period.
/// 3. [TeeProofGenerationJobStatus::PermanentlyIgnored] when the proof generation has been
/// continuously failing for an extended period.
#[derive(Clone, Debug)]
pub struct LockedBatch {
/// Locked batch number.
pub l1_batch_number: L1BatchNumber,
/// The creation time of the job for this batch. It is used to determine if the batch should be
/// considered as [TeeProofGenerationJobStatus::PermanentlyIgnored].
/// The creation time of the job for this batch. It is used to determine if the batch should
/// transition to [TeeProofGenerationJobStatus::PermanentlyIgnored] or [TeeProofGenerationJobStatus::Failed].
pub created_at: DateTime<Utc>,
}

Expand Down Expand Up @@ -78,11 +80,8 @@ impl TeeProofGenerationDal<'_, '_> {
AND (
tee.l1_batch_number IS NULL
OR (
tee.status = $3
OR (
tee.status = $2
AND tee.prover_taken_at < NOW() - $4::INTERVAL
)
(tee.status = $2 OR tee.status = $3)
AND tee.prover_taken_at < NOW() - $4::INTERVAL
)
)
FETCH FIRST ROW ONLY
Expand Down Expand Up @@ -113,7 +112,7 @@ impl TeeProofGenerationDal<'_, '_> {
"#,
tee_type.to_string(),
TeeProofGenerationJobStatus::PickedByProver.to_string(),
TeeProofGenerationJobStatus::Unpicked.to_string(),
TeeProofGenerationJobStatus::Failed.to_string(),
processing_timeout,
min_batch_number
)
Expand Down Expand Up @@ -291,7 +290,7 @@ impl TeeProofGenerationDal<'_, '_> {
"#,
batch_number,
tee_type.to_string(),
TeeProofGenerationJobStatus::Unpicked.to_string(),
TeeProofGenerationJobStatus::PickedByProver.to_string(),
);
let instrumentation = Instrumented::new("insert_tee_proof_generation_job")
.with_arg("l1_batch_number", &batch_number)
Expand All @@ -306,7 +305,7 @@ impl TeeProofGenerationDal<'_, '_> {
}

/// For testing purposes only.
pub async fn get_oldest_unpicked_batch(&mut self) -> DalResult<Option<L1BatchNumber>> {
pub async fn get_oldest_picked_by_prover_batch(&mut self) -> DalResult<Option<L1BatchNumber>> {
let query = sqlx::query!(
r#"
SELECT
Expand All @@ -320,7 +319,7 @@ impl TeeProofGenerationDal<'_, '_> {
LIMIT
1
"#,
TeeProofGenerationJobStatus::Unpicked.to_string(),
TeeProofGenerationJobStatus::PickedByProver.to_string(),
);
let batch_number = Instrumented::new("get_oldest_unpicked_batch")
.with(query)
Expand Down
20 changes: 3 additions & 17 deletions core/node/proof_data_handler/src/tee_request_processor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,7 @@ impl TeeRequestProcessor {
tracing::info!("Received request for proof generation data: {:?}", request);

let batch_ignored_timeout = ChronoDuration::days(10);
let mut min_batch_number = self.config.tee_config.first_tee_processed_batch;
let mut missing_range: Option<(L1BatchNumber, L1BatchNumber)> = None;
let min_batch_number = self.config.tee_config.first_tee_processed_batch;

let result = loop {
let Some(locked_batch) = self
Expand All @@ -72,15 +71,11 @@ impl TeeRequestProcessor {
break Ok(Some(Json(TeeProofGenerationDataResponse(Box::new(input)))));
}
Err(RequestProcessorError::ObjectStore(ObjectStoreError::KeyNotFound(_))) => {
missing_range = match missing_range {
Some((start, _)) => Some((start, batch_number)),
None => Some((batch_number, batch_number)),
};
let duration = Utc::now().signed_duration_since(locked_batch.created_at);
let status = if duration > batch_ignored_timeout {
TeeProofGenerationJobStatus::PermanentlyIgnored
} else {
TeeProofGenerationJobStatus::Unpicked
TeeProofGenerationJobStatus::Failed
};
self.unlock_batch(batch_number, request.tee_type, status)
.await?;
Expand All @@ -90,28 +85,19 @@ impl TeeRequestProcessor {
batch_number,
locked_batch.created_at
);
min_batch_number += 1;
}
Err(err) => {
self.unlock_batch(
batch_number,
request.tee_type,
TeeProofGenerationJobStatus::Unpicked,
TeeProofGenerationJobStatus::Failed,
)
.await?;
break Err(err);
}
}
};

if let Some((start, end)) = missing_range {
tracing::warn!(
"Blobs for batch numbers {} to {} not found in the object store. Marked as unpicked or permanently ignored.",
start,
end
);
}

result
}

Expand Down
6 changes: 3 additions & 3 deletions core/node/proof_data_handler/src/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ async fn submit_tee_proof() {
let mut proof_db_conn = db_conn_pool.connection().await.unwrap();
let oldest_batch_number = proof_db_conn
.tee_proof_generation_dal()
.get_oldest_unpicked_batch()
.get_oldest_picked_by_prover_batch()
.await
.unwrap();

Expand Down Expand Up @@ -156,7 +156,7 @@ async fn mock_tee_batch_status(

// there should not be any batches awaiting proof in the db yet

let oldest_batch_number = proof_dal.get_oldest_unpicked_batch().await.unwrap();
let oldest_batch_number = proof_dal.get_oldest_picked_by_prover_batch().await.unwrap();
assert!(oldest_batch_number.is_none());

// mock SQL table with relevant information about the status of TEE proof generation
Expand All @@ -169,7 +169,7 @@ async fn mock_tee_batch_status(
// now, there should be one batch in the db awaiting proof

let oldest_batch_number = proof_dal
.get_oldest_unpicked_batch()
.get_oldest_picked_by_prover_batch()
.await
.unwrap()
.unwrap();
Expand Down

0 comments on commit bfeddc9

Please sign in to comment.