-
Notifications
You must be signed in to change notification settings - Fork 2.1k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(Prover CLI):
requeue
cmd (#1719)
## What ❔ <!-- What are the changes this PR brings about? --> <!-- Example: This PR adds a PR template to the repo. --> <!-- (For bigger PRs adding more context is appreciated) --> Adds a `requeue` command that requeues a given batch if it's stuck. It looks at all jobs that are stuck and requeues them. A job is stuck if it has a large number of attempts and is not successful. For now, the attempts are set via a cmd flag (`--max-attempts`) which if not set, is 10 by default. ### Usage Example ``` cd prover/prover_cli zk f cargo run --release requeue --batch 1 ``` ## Why ❔ <!-- Why are these changes done? What goal do they contribute to? What are the principles behind them? --> <!-- Example: PR templates ensure PR reviewers, observers, and future iterators are in context about the evolution of repos. --> We want to be able to requeue a stuck batch with the CLI. ## Checklist <!-- Check your PR fulfills the following items. --> <!-- For draft PRs check the boxes as you complete them. --> - [x] PR title corresponds to the body of PR (we generate changelog entries from PRs). - [ ] Tests for the changes have been added / updated. - [x] Documentation comments have been added / updated. - [x] Code has been formatted via `zk fmt` and `zk lint`. - [ ] Spellcheck has been run via `zk spellcheck`. - [x] Linkcheck has been run via `zk linkcheck`. --------- Co-authored-by: Ivan Litteri <ivanlitteri@Ivans-MacBook-Pro.local> Co-authored-by: Joaquin Carletti <joaquin.carletti@lambdaclass.com> Co-authored-by: Joaquin Carletti <56092489+ColoCarletti@users.noreply.github.com>
- Loading branch information
1 parent
5f7bda7
commit f722df7
Showing
12 changed files
with
474 additions
and
10 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,87 @@ | ||
use anyhow::Context; | ||
use clap::Args as ClapArgs; | ||
use prover_dal::{ConnectionPool, Prover, ProverDal}; | ||
use zksync_types::{basic_fri_types::AggregationRound, prover_dal::StuckJobs, L1BatchNumber}; | ||
|
||
use crate::cli::ProverCLIConfig; | ||
|
||
#[derive(ClapArgs)] | ||
pub struct Args { | ||
#[clap(short, long)] | ||
batch: L1BatchNumber, | ||
/// Maximum number of attempts to re-queue a job. | ||
/// Default value is 10. | ||
/// NOTE: this argument is temporary and will be deprecated once the `config` command is implemented. | ||
#[clap(long, default_value_t = 10)] | ||
max_attempts: u32, | ||
} | ||
|
||
pub async fn run(args: Args, config: ProverCLIConfig) -> anyhow::Result<()> { | ||
let pool = ConnectionPool::<Prover>::singleton(config.db_url) | ||
.build() | ||
.await | ||
.context("failed to build a prover_connection_pool")?; | ||
|
||
let mut conn = pool | ||
.connection() | ||
.await | ||
.context("failed to acquire a connection")?; | ||
|
||
let mut fri_witness_generator_dal = conn.fri_witness_generator_dal(); | ||
|
||
let stuck_witness_input_jobs = fri_witness_generator_dal | ||
.requeue_stuck_witness_inputs_jobs_for_batch(args.batch, args.max_attempts) | ||
.await; | ||
display_requeued_stuck_jobs(stuck_witness_input_jobs, AggregationRound::BasicCircuits); | ||
|
||
let stuck_leaf_aggregations_stuck_jobs = fri_witness_generator_dal | ||
.requeue_stuck_leaf_aggregation_jobs_for_batch(args.batch, args.max_attempts) | ||
.await; | ||
display_requeued_stuck_jobs( | ||
stuck_leaf_aggregations_stuck_jobs, | ||
AggregationRound::LeafAggregation, | ||
); | ||
|
||
let stuck_node_aggregations_jobs = fri_witness_generator_dal | ||
.requeue_stuck_node_aggregation_jobs_for_batch(args.batch, args.max_attempts) | ||
.await; | ||
display_requeued_stuck_jobs( | ||
stuck_node_aggregations_jobs, | ||
AggregationRound::NodeAggregation, | ||
); | ||
|
||
let stuck_recursion_tip_job = fri_witness_generator_dal | ||
.requeue_stuck_recursion_tip_jobs_for_batch(args.batch, args.max_attempts) | ||
.await; | ||
display_requeued_stuck_jobs(stuck_recursion_tip_job, AggregationRound::RecursionTip); | ||
|
||
let stuck_scheduler_jobs = fri_witness_generator_dal | ||
.requeue_stuck_scheduler_jobs_for_batch(args.batch, args.max_attempts) | ||
.await; | ||
display_requeued_stuck_jobs(stuck_scheduler_jobs, AggregationRound::Scheduler); | ||
|
||
let stuck_proof_compressor_jobs = conn | ||
.fri_proof_compressor_dal() | ||
.requeue_stuck_jobs_for_batch(args.batch, args.max_attempts) | ||
.await; | ||
for stuck_job in stuck_proof_compressor_jobs { | ||
println!("Re-queuing proof compressor job {stuck_job:?} 🔁",); | ||
} | ||
|
||
let stuck_prover_jobs = conn | ||
.fri_prover_jobs_dal() | ||
.requeue_stuck_jobs_for_batch(args.batch, args.max_attempts) | ||
.await; | ||
|
||
for stuck_job in stuck_prover_jobs { | ||
println!("Re-queuing prover job {stuck_job:?} 🔁",); | ||
} | ||
|
||
Ok(()) | ||
} | ||
|
||
fn display_requeued_stuck_jobs(stuck_jobs: Vec<StuckJobs>, aggregation_round: AggregationRound) { | ||
for stuck_job in stuck_jobs { | ||
println!("Re-queuing {aggregation_round} stuck job {stuck_job:?} 🔁",); | ||
} | ||
} |
41 changes: 41 additions & 0 deletions
41
...ver_dal/.sqlx/query-36375be0667ab6241a3f6432e802279dcfd0261dc58f20fb3454a4d5146a561a.json
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
10 changes: 8 additions & 2 deletions
10
...8dcbcb014b4f808c6232abd9a83354c995ac.json → ...dce6412e2725cf5162ce7a733f6dceaecb11.json
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
10 changes: 8 additions & 2 deletions
10
...fb7a093b73727f75e0cb7db9cea480c95f5c.json → ...9718349ac4fc08b455c7f4265d7443f2ec13.json
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
29 changes: 29 additions & 0 deletions
29
...ver_dal/.sqlx/query-dccb1bb8250716e8b82714c77f7998b9fa0434d590eecab8448e89be853e5352.json
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
10 changes: 8 additions & 2 deletions
10
...e88abd0f8164c2413dc83c91c29665ca645e.json → ...bae42849574731d33539bfdcca21c9b64f4e.json
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.