Skip to content

Commit

Permalink
tune
Browse files Browse the repository at this point in the history
  • Loading branch information
msmouse committed Sep 21, 2024
1 parent c126ec4 commit 4dbfcda
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 17 deletions.
11 changes: 7 additions & 4 deletions .github/workflows/replay-verify.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -68,12 +68,13 @@ jobs:
HISTORY_START: 862000000
# to see historical TXNS_TO_SKIP, check out ce6158ac2764ee9d4c8738a85f3bcdc6bd0cadc1
TXNS_TO_SKIP: "0"
# 1198522613-1203396485: https://github.com/aptos-labs/aptos-core/pull/13832
RANGES_TO_SKIP: "1198522613-1203396485"
# 1195000000-122000000: https://github.com/aptos-labs/aptos-core/pull/13832
RANGES_TO_SKIP: "1195000000-122000000"
BACKUP_CONFIG_TEMPLATE_PATH: terraform/helm/fullnode/files/backup/gcs.yaml
# workflow config
RUNS_ON: "high-perf-docker-with-local-ssd"
TIMEOUT_MINUTES: 180
MAX_VERSIONS_PER_RANGE: 1800000

replay-mainnet:
if: |
Expand All @@ -97,6 +98,7 @@ jobs:
# workflow config
RUNS_ON: "high-perf-docker-with-local-ssd"
TIMEOUT_MINUTES: 180
MAX_VERSIONS_PER_RANGE: 1000000

test-replay:
if: ${{ (github.event_name == 'pull_request') && contains(github.event.pull_request.labels.*.name, 'CICD:test-replay')}}
Expand All @@ -111,9 +113,10 @@ jobs:
HISTORY_START: 862000000
# to see historical TXNS_TO_SKIP, check out ce6158ac2764ee9d4c8738a85f3bcdc6bd0cadc1
TXNS_TO_SKIP: "0"
# 1198522613-1203396485: https://github.com/aptos-labs/aptos-core/pull/13832
RANGES_TO_SKIP: "1198522613-1203396485"
# 1195000000-122000000: https://github.com/aptos-labs/aptos-core/pull/13832
RANGES_TO_SKIP: "1195000000-122000000"
BACKUP_CONFIG_TEMPLATE_PATH: terraform/helm/fullnode/files/backup/gcs.yaml
# workflow config
RUNS_ON: "high-perf-docker-with-local-ssd"
TIMEOUT_MINUTES: 120 # increase test replay timeout to capture more flaky errors
MAX_VERSIONS_PER_RANGE: 1800000
12 changes: 10 additions & 2 deletions .github/workflows/workflow-run-replay-verify.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,10 @@ on:
type: number
required: true
default: 180
MAX_VERSIONS_PER_RANGE:
description: "The maximum number of versions to process in a single job."
type: number
required: true
# This allows the workflow to be triggered manually from the Github UI or CLI
# NOTE: because the "number" type is not supported, we default to 720 minute timeout
workflow_dispatch:
Expand Down Expand Up @@ -83,7 +87,10 @@ on:
type: string
required: true
default: "high-perf-docker-with-local-ssd"

MAX_VERSIONS_PER_RANGE:
description: "The maximum number of versions to process in a single job."
type: number
required: true
jobs:
prepare:
runs-on: ${{ inputs.RUNS_ON }}
Expand Down Expand Up @@ -148,6 +155,7 @@ jobs:
--command-adapter-config ${{ inputs.BACKUP_CONFIG_TEMPLATE_PATH }} \
--start-version ${{ inputs.HISTORY_START }} \
--ranges-to-skip "${{ inputs.RANGES_TO_SKIP }}" \
--max-versions-per-range ${{ inputs.MAX_VERSIONS_PER_RANGE }} \
\
--max-ranges-per-job 16 \
--output-json-file jobs.json \
Expand Down Expand Up @@ -249,7 +257,7 @@ jobs:
--enable-storage-sharding \
--target-db-dir $DB \
--concurrent-downloads 8 \
--replay-concurrency-level 2 \
--replay-concurrency-level 4 \
|| res=$?
if [[ $res == 0 || $res == 2 ]]
Expand Down
18 changes: 7 additions & 11 deletions storage/db-tool/src/gen_replay_verify_jobs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,6 @@ pub struct Opt {
help = "The first transaction version required to be replayed and verified. [Defaults to 0]"
)]
start_version: Option<Version>,
#[clap(
long,
help = "Target number of transactions for each job to replay",
default_value = "1500000"
)]
target_job_size: u64,
#[clap(
long,
help = "Determines the oldest epoch to replay, relative to the latest",
Expand All @@ -50,6 +44,8 @@ pub struct Opt {
value_delimiter = ' '
)]
ranges_to_skip: Vec<String>,
#[clap(long, help = "Target number of transactions for each job to replay")]
max_versions_per_range: u64,
#[clap(long, help = "Maximum ranges per job.")]
max_ranges_per_job: u64,
#[clap(long, help = "Output json file containing the jobs.")]
Expand Down Expand Up @@ -98,7 +94,7 @@ impl Opt {
.batching(|it| {
match it.next() {
Some((end, mut begin)) => {
if end.version - begin.version >= self.target_job_size {
if end.version - begin.version >= self.max_versions_per_range {
// cut big range short, this hopefully automatically skips load tests
let msg = if end.epoch - begin.epoch > 15 {
"!!! Need more snapshots !!!"
Expand All @@ -108,21 +104,21 @@ impl Opt {
Some((
true,
begin.version,
begin.version + self.target_job_size - 1,
begin.version + self.max_versions_per_range - 1,
format!(
"Partial replay epoch {} - {}, {} txns starting from version {}, another {} versions omitted, until {}. {}",
begin.epoch,
end.epoch - 1,
self.target_job_size,
self.max_versions_per_range,
begin.version,
end.version - begin.version - self.target_job_size,
end.version - begin.version - self.max_versions_per_range,
end.version,
msg
)
))
} else {
while let Some((_prev_end, prev_begin)) = it.peek() {
if end.version - prev_begin.version > self.target_job_size {
if end.version - prev_begin.version > self.max_versions_per_range {
break;
}
begin = prev_begin;
Expand Down

0 comments on commit 4dbfcda

Please sign in to comment.