diff --git a/monitoring/docs/monitoring-and-telemetry.adoc b/monitoring/docs/monitoring-and-telemetry.adoc index a1a8c52ad..ead266961 100644 --- a/monitoring/docs/monitoring-and-telemetry.adoc +++ b/monitoring/docs/monitoring-and-telemetry.adoc @@ -118,6 +118,7 @@ events produced by the monitoring component are: * DKG result challenged, * large deposit revealed, * large redemption requested, +* stale redemption, * optimistic minting canceled, * optimistic minting requested too early, * optimistic minting requested for undetermined Bitcoin transaction, @@ -180,6 +181,14 @@ team’s attention. The default action is making sure that the redemption is not a result of a malicious action, and if not, that the redemption is handled correctly by the system. +=== Stale redemption + +A *warning system event* indicating that a redemption request became stale, i.e. +was not handled within the expected time. This event is sent to Sentry hub and +should get team’s attention. The default action is investigating the cause +of the extended processing time as this alert may be an early sign of +a malfunctioning wallet or may indicate a problem with the maintainer bot. + ==== Optimistic minting cancelled A *warning system event* indicating that an optimistic minting request was diff --git a/monitoring/src/contracts.ts b/monitoring/src/contracts.ts index 25ce2c0fc..7e82e34e3 100644 --- a/monitoring/src/contracts.ts +++ b/monitoring/src/contracts.ts @@ -39,6 +39,11 @@ const resolve = () => { return block.number } + const blockTimestamp = async (blockNumber: number): Promise => { + const block = await provider.getBlock(blockNumber) + return block.timestamp + } + // eslint-disable-next-line @typescript-eslint/no-var-requires,global-require,import/no-dynamic-require const bridgeArtifact = require(`${packageName}/artifacts/Bridge.json`) const bridge: Bridge = new EthereumBridge({ @@ -63,7 +68,7 @@ const resolve = () => { deployedAtBlockNumber: tbtcTokenArtifact.receipt.blockNumber, }) - return { bridge, tbtcVault, tbtcToken, latestBlock } + return { bridge, tbtcVault, tbtcToken, latestBlock, blockTimestamp } } export const contracts = resolve() diff --git a/monitoring/src/redemption-monitor.ts b/monitoring/src/redemption-monitor.ts index 45048cf4d..98feb60c9 100644 --- a/monitoring/src/redemption-monitor.ts +++ b/monitoring/src/redemption-monitor.ts @@ -1,13 +1,21 @@ import { BigNumber } from "ethers" +import { Hex } from "@keep-network/tbtc-v2.ts" import { context } from "./context" import { SystemEventType } from "./system-event" import { satsToRoundedBTC } from "./deposit-monitor" import { createEthTxUrl } from "./block-explorer" +import { contracts } from "./contracts" +import type { Monitor as SystemEventMonitor, SystemEvent } from "./system-event" import type { RedemptionRequestedEvent as RedemptionRequestedChainEvent } from "@keep-network/tbtc-v2.ts/dist/src/redemption" import type { Bridge } from "@keep-network/tbtc-v2.ts/dist/src/chain" -import type { Monitor as SystemEventMonitor, SystemEvent } from "./system-event" + +// The time after which a pending redemption request is considered stale. +// Typically, a redemption request processing time should not exceed 5 hours. +// A redemption request pending for 8 hours indicates that something is wrong. +// This value is expressed in blocks, assuming 12 seconds per block. +const redemptionRequestStaleBlocks = (8 * 60 * 60) / 12 const RedemptionRequested = ( chainEvent: RedemptionRequestedChainEvent @@ -47,6 +55,25 @@ const LargeRedemptionRequested = ( } } +const StaleRedemption = ( + chainEvent: RedemptionRequestedChainEvent +): SystemEvent => { + const ethRequestTxHashURL = createEthTxUrl(chainEvent.transactionHash) + + return { + title: "Stale redemption", + type: SystemEventType.Warning, + data: { + walletPublicKeyHash: chainEvent.walletPublicKeyHash, + redeemerOutputScript: chainEvent.redeemerOutputScript, + requestedAmountBTC: satsToRoundedBTC(chainEvent.requestedAmount), + ethRequestTxHash: chainEvent.transactionHash.toPrefixedString(), + ethRequestTxHashURL, + }, + block: chainEvent.blockNumber, + } +} + export class RedemptionMonitor implements SystemEventMonitor { private bridge: Bridge @@ -58,6 +85,22 @@ export class RedemptionMonitor implements SystemEventMonitor { // eslint-disable-next-line no-console console.log("running redemption monitor check") + const systemEvents: SystemEvent[] = [] + systemEvents.push( + ...(await this.checkRequestedRedemptions(fromBlock, toBlock)) + ) + systemEvents.push(...(await this.checkStaleRedemptions(fromBlock, toBlock))) + + // eslint-disable-next-line no-console + console.log("completed redemption monitor check") + + return systemEvents + } + + private async checkRequestedRedemptions( + fromBlock: number, + toBlock: number + ): Promise { const chainEvents = await this.bridge.getRedemptionRequestedEvents({ fromBlock, toBlock, @@ -80,9 +123,73 @@ export class RedemptionMonitor implements SystemEventMonitor { } } - // eslint-disable-next-line no-console - console.log("completed redemption monitor check") - return systemEvents } + + private async checkStaleRedemptions( + fromBlock: number, + toBlock: number + ): Promise { + const rewindBlock = (block: number, shift: number) => + block - shift > 0 ? block - shift : 0 + + // We need to rewind the block window by the minting request timeout. + // This way, we are looking for past deposits whose time for creating + // the minting request was already elapsed. + const chainEvents = await this.bridge.getRedemptionRequestedEvents({ + fromBlock: rewindBlock(fromBlock, redemptionRequestStaleBlocks), + toBlock: rewindBlock(toBlock, redemptionRequestStaleBlocks), + }) + + const chainEventsTimestamps = await Promise.all( + chainEvents.map((ce) => contracts.blockTimestamp(ce.blockNumber)) + ) + + // To fetch pending redemptions requests, we need to know the plain-text + // public keys of the wallets used by the given chain events. In order to + // achieve that, we build a cache where the key is the wallet public key + // hash and the value is the wallet plain text public key. + // + // TODO: This can be optimized by refactoring the Bridge.pendingRedemptions + // method to accept wallet public key hashes directly. + const walletCache = new Map() + // eslint-disable-next-line no-plusplus + for (let i = 0; i < chainEvents.length; i++) { + const { walletPublicKeyHash } = chainEvents[i] + + if (!walletCache.has(walletPublicKeyHash)) { + // eslint-disable-next-line no-await-in-loop + const wallet = await this.bridge.wallets(Hex.from(walletPublicKeyHash)) + walletCache.set(walletPublicKeyHash, wallet.walletPublicKey.toString()) + } + } + + const pendingRedemptionsRequests = await Promise.all( + chainEvents.map((ce) => + this.bridge.pendingRedemptions( + // eslint-disable-next-line @typescript-eslint/no-non-null-assertion + walletCache.get(ce.walletPublicKeyHash)!, + ce.redeemerOutputScript + ) + ) + ) + + return chainEvents + .filter((ce, index) => { + const pendingRedemptionRequest = pendingRedemptionsRequests[index] + const chainEventTimestamp = chainEventsTimestamps[index] + + // To consider a redemption as stale, the redemption request must be + // still pending after redemptionRequestStaleBlocks. As there can + // be multiple redemption requests for the given wallet and + // redeemer output script pair, we need to make sure the pending + // redemption request timestamp matches the timestamp of the given + // chain event emission block. + return ( + pendingRedemptionRequest.requestedAt !== 0 && + pendingRedemptionRequest.requestedAt === chainEventTimestamp + ) + }) + .map(StaleRedemption) + } }