diff --git a/.changeset/ninety-points-provide.md b/.changeset/ninety-points-provide.md new file mode 100644 index 00000000..cce19457 --- /dev/null +++ b/.changeset/ninety-points-provide.md @@ -0,0 +1,6 @@ +--- +'@powersync/service-core': minor +'@powersync/service-image': minor +--- + +Report lack of commits or keepalives as issues in the diagnostics api. diff --git a/packages/service-core/src/api/diagnostics.ts b/packages/service-core/src/api/diagnostics.ts index a4c6af3c..36e2be82 100644 --- a/packages/service-core/src/api/diagnostics.ts +++ b/packages/service-core/src/api/diagnostics.ts @@ -134,6 +134,37 @@ export async function getSyncRulesStatus( }) ); + if (live_status && status?.active) { + // Check replication lag for active sync rules. + if (sync_rules.last_checkpoint_ts == null && sync_rules.last_keepalive_ts == null) { + errors.push({ + level: 'warning', + message: 'No checkpoint found, cannot calculate replication lag' + }); + } else { + const lastTime = Math.max( + sync_rules.last_checkpoint_ts?.getTime() ?? 0, + sync_rules.last_keepalive_ts?.getTime() ?? 0 + ); + const lagSeconds = Math.round((Date.now() - lastTime) / 1000); + // On idle instances, keepalive messages are only persisted every 60 seconds. + // So we use 2 minutes as a threshold for warnings, and 15 minutes for critical. + // The replication lag metric should give a more granular value, but that is not available directly + // in the API containers used for diagnostics, and this should give a good enough indication. + if (lagSeconds > 15 * 60) { + errors.push({ + level: 'fatal', + message: `No replicated commit in more than ${lagSeconds}s` + }); + } else if (lagSeconds > 120) { + errors.push({ + level: 'warning', + message: `No replicated commit in more than ${lagSeconds}s` + }); + } + } + } + return { content: include_content ? sync_rules.sync_rules_content : undefined, connections: [ diff --git a/packages/service-core/src/replication/AbstractReplicator.ts b/packages/service-core/src/replication/AbstractReplicator.ts index b102350d..e76debe7 100644 --- a/packages/service-core/src/replication/AbstractReplicator.ts +++ b/packages/service-core/src/replication/AbstractReplicator.ts @@ -10,8 +10,8 @@ import { AbstractReplicationJob } from './AbstractReplicationJob.js'; import { ErrorRateLimiter } from './ErrorRateLimiter.js'; import { ConnectionTestResult } from './ReplicationModule.js'; -// 5 minutes -const PING_INTERVAL = 1_000_000_000n * 300n; +// 1 minute +const PING_INTERVAL = 1_000_000_000n * 60n; export interface CreateJobOptions { lock: storage.ReplicationLock;