Skip to content

Commit

Permalink
feat: track epoch transition steps time in metrics (#6143)
Browse files Browse the repository at this point in the history
* feat: track epoch transition steps time in metrics

* feat: also track beforeProcessEpoch and afterProcessEpoch

* fix: do not need to time simple steps in processEpoch

* fix: incorporate to single metric with step label

* fix: pass metrics to processEpoch()

* chore: use separate timer for each epoch transition step

* chore: use blocks to separate timers

---------

Co-authored-by: Cayman <caymannava@gmail.com>
  • Loading branch information
twoeths and wemeetagain authored Dec 12, 2023
1 parent d7357fd commit 3a43dcb
Show file tree
Hide file tree
Showing 4 changed files with 90 additions and 16 deletions.
6 changes: 6 additions & 0 deletions packages/beacon-node/src/metrics/metrics/lodestar.ts
Original file line number Diff line number Diff line change
Expand Up @@ -286,6 +286,12 @@ export function createLodestarMetrics(
help: "Time to call commit after process a single epoch transition in seconds",
buckets: [0.01, 0.05, 0.1, 0.2, 0.5, 0.75, 1],
}),
epochTransitionStepTime: register.histogram<"step">({
name: "lodestar_stfn_epoch_transition_step_seconds",
help: "Time to call each step of epoch transition in seconds",
labelNames: ["step"],
buckets: [0.01, 0.05, 0.1, 0.2, 0.5, 0.75, 1],
}),
processBlockTime: register.histogram({
name: "lodestar_stfn_process_block_seconds",
help: "Time to process a single block in seconds",
Expand Down
69 changes: 61 additions & 8 deletions packages/state-transition/src/epoch/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import {
CachedBeaconStatePhase0,
EpochTransitionCache,
} from "../types.js";
import {BeaconStateTransitionMetrics} from "../metrics.js";
import {processEffectiveBalanceUpdates} from "./processEffectiveBalanceUpdates.js";
import {processEth1DataReset} from "./processEth1DataReset.js";
import {processHistoricalRootsUpdate} from "./processHistoricalRootsUpdate.js";
Expand Down Expand Up @@ -50,7 +51,12 @@ export {computeUnrealizedCheckpoints} from "./computeUnrealizedCheckpoints.js";
const maxValidatorsPerStateSlashing = SLOTS_PER_EPOCH * MAX_ATTESTER_SLASHINGS * MAX_VALIDATORS_PER_COMMITTEE;
const maxSafeValidators = Math.floor(Number.MAX_SAFE_INTEGER / MAX_EFFECTIVE_BALANCE);

export function processEpoch(fork: ForkSeq, state: CachedBeaconStateAllForks, cache: EpochTransitionCache): void {
export function processEpoch(
fork: ForkSeq,
state: CachedBeaconStateAllForks,
cache: EpochTransitionCache,
metrics?: BeaconStateTransitionMetrics | null
): void {
// state.slashings is initially a Gwei (BigInt) vector, however since Nov 2023 it's converted to UintNum64 (number) vector in the state transition because:
// - state.slashings[nextEpoch % EPOCHS_PER_SLASHINGS_VECTOR] is reset per epoch in processSlashingsReset()
// - max slashed validators per epoch is SLOTS_PER_EPOCH * MAX_ATTESTER_SLASHINGS * MAX_VALIDATORS_PER_COMMITTEE which is 32 * 2 * 2048 = 131072 on mainnet
Expand All @@ -59,19 +65,53 @@ export function processEpoch(fork: ForkSeq, state: CachedBeaconStateAllForks, ca
throw new Error("Lodestar does not support this network, parameters don't fit number value inside state.slashings");
}

processJustificationAndFinalization(state, cache);
{
const timer = metrics?.epochTransitionStepTime.startTimer({
step: "processJustificationAndFinalization",
});
processJustificationAndFinalization(state, cache);
timer?.();
}

if (fork >= ForkSeq.altair) {
const timer = metrics?.epochTransitionStepTime.startTimer({step: "processInactivityUpdates"});
processInactivityUpdates(state as CachedBeaconStateAltair, cache);
timer?.();
}

// processRewardsAndPenalties() is 2nd step in the specs, we optimize to do it
// after processSlashings() to update balances only once
// processRewardsAndPenalties(state, cache);
processRegistryUpdates(state, cache);
{
const timer = metrics?.epochTransitionStepTime.startTimer({step: "processRegistryUpdates"});
processRegistryUpdates(state, cache);
timer?.();
}

// accumulate slashing penalties and only update balances once in processRewardsAndPenalties()
const slashingPenalties = processSlashings(state, cache, false);
processRewardsAndPenalties(state, cache, slashingPenalties);
let slashingPenalties: number[];
{
const timer = metrics?.epochTransitionStepTime.startTimer({step: "processSlashings"});
slashingPenalties = processSlashings(state, cache, false);
timer?.();
}

{
const timer = metrics?.epochTransitionStepTime.startTimer({step: "processRewardsAndPenalties"});
processRewardsAndPenalties(state, cache, slashingPenalties);
timer?.();
}

processEth1DataReset(state, cache);
processEffectiveBalanceUpdates(state, cache);

{
const timer = metrics?.epochTransitionStepTime.startTimer({
step: "processEffectiveBalanceUpdates",
});
processEffectiveBalanceUpdates(state, cache);
timer?.();
}

processSlashingsReset(state, cache);
processRandaoMixesReset(state, cache);

Expand All @@ -84,7 +124,20 @@ export function processEpoch(fork: ForkSeq, state: CachedBeaconStateAllForks, ca
if (fork === ForkSeq.phase0) {
processParticipationRecordUpdates(state as CachedBeaconStatePhase0);
} else {
processParticipationFlagUpdates(state as CachedBeaconStateAltair);
processSyncCommitteeUpdates(state as CachedBeaconStateAltair);
{
const timer = metrics?.epochTransitionStepTime.startTimer({
step: "processParticipationFlagUpdates",
});
processParticipationFlagUpdates(state as CachedBeaconStateAltair);
timer?.();
}

{
const timer = metrics?.epochTransitionStepTime.startTimer({
step: "processSyncCommitteeUpdates",
});
processSyncCommitteeUpdates(state as CachedBeaconStateAltair);
timer?.();
}
}
}
3 changes: 2 additions & 1 deletion packages/state-transition/src/metrics.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import {AttesterStatus} from "./util/attesterStatus.js";
export type BeaconStateTransitionMetrics = {
epochTransitionTime: Histogram;
epochTransitionCommitTime: Histogram;
epochTransitionStepTime: Histogram<"step">;
processBlockTime: Histogram;
processBlockCommitTime: Histogram;
stateHashTreeRootTime: Histogram;
Expand All @@ -23,7 +24,7 @@ export type BeaconStateTransitionMetrics = {
type LabelValues<T extends string> = Partial<Record<T, string | number>>;

interface Histogram<T extends string = string> {
startTimer(): () => void;
startTimer(labels?: LabelValues<T>): (labels?: LabelValues<T>) => number;

observe(value: number): void;
observe(labels: LabelValues<T>, values: number): void;
Expand Down
28 changes: 21 additions & 7 deletions packages/state-transition/src/stateTransition.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import {toHexString} from "@chainsafe/ssz";
import {allForks, Slot, ssz} from "@lodestar/types";
import {SLOTS_PER_EPOCH} from "@lodestar/params";
import {BeaconStateTransitionMetrics, onPostStateMetrics, onStateCloneMetrics} from "./metrics.js";
import {beforeProcessEpoch, EpochTransitionCacheOpts} from "./cache/epochTransitionCache.js";
import {beforeProcessEpoch, EpochTransitionCache, EpochTransitionCacheOpts} from "./cache/epochTransitionCache.js";
import {
CachedBeaconStateAllForks,
CachedBeaconStatePhase0,
Expand Down Expand Up @@ -165,19 +165,33 @@ function processSlotsWithTransientCache(

const epochTransitionTimer = metrics?.epochTransitionTime.startTimer();

const epochTransitionCache = beforeProcessEpoch(postState, epochTransitionCacheOpts);
processEpoch(fork, postState, epochTransitionCache);
let epochTransitionCache: EpochTransitionCache;
{
const timer = metrics?.epochTransitionStepTime.startTimer({step: "beforeProcessEpoch"});
epochTransitionCache = beforeProcessEpoch(postState, epochTransitionCacheOpts);
timer?.();
}

processEpoch(fork, postState, epochTransitionCache, metrics);

const {currentEpoch, statuses, balances} = epochTransitionCache;
metrics?.registerValidatorStatuses(currentEpoch, statuses, balances);

postState.slot++;
postState.epochCtx.afterProcessEpoch(postState, epochTransitionCache);

{
const timer = metrics?.epochTransitionStepTime.startTimer({step: "afterProcessEpoch"});
postState.epochCtx.afterProcessEpoch(postState, epochTransitionCache);
timer?.();
}

// Running commit here is not strictly necessary. The cost of running commit twice (here + after process block)
// Should be negligible but gives better metrics to differentiate the cost of it for block and epoch proc.
const epochTransitionCommitTimer = metrics?.epochTransitionCommitTime.startTimer();
postState.commit();
epochTransitionCommitTimer?.();
{
const timer = metrics?.epochTransitionCommitTime.startTimer();
postState.commit();
timer?.();
}

// Note: time only on success. Include beforeProcessEpoch, processEpoch, afterProcessEpoch, commit
epochTransitionTimer?.();
Expand Down

0 comments on commit 3a43dcb

Please sign in to comment.