From b97f307c755229120beaeb2ef07e7e9c1311517f Mon Sep 17 00:00:00 2001 From: Nikolay Karadzhov Date: Thu, 14 Aug 2025 14:11:15 +0300 Subject: [PATCH 01/24] feat(errors): Add specialized timeout error types for maintenance scenarios - Added `SocketTimeoutDuringMaintananceError`, a subclass of `TimeoutError`, to handle socket timeouts during maintenance. - Added `CommandTimeoutDuringMaintananceError`, another subclass of `TimeoutError`, to address command write timeouts during maintenance. --- packages/client/lib/errors.ts | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/packages/client/lib/errors.ts b/packages/client/lib/errors.ts index 5cb9166df0..ae4d598abd 100644 --- a/packages/client/lib/errors.ts +++ b/packages/client/lib/errors.ts @@ -71,6 +71,18 @@ export class BlobError extends ErrorReply {} export class TimeoutError extends Error {} +export class SocketTimeoutDuringMaintananceError extends TimeoutError { + constructor(timeout: number) { + super(`Socket timeout during maintenance. Expecting data, but didn't receive any in ${timeout}ms.`); + } +} + +export class CommandTimeoutDuringMaintananceError extends TimeoutError { + constructor(timeout: number) { + super(`Command timeout during maintenance. Waited to write command for more than ${timeout}ms.`); + } +} + export class MultiErrorReply extends ErrorReply { replies: Array; errorIndexes: Array; From 45c79f7d7d6d856435d5f6a82efb27e731812c55 Mon Sep 17 00:00:00 2001 From: Nikolay Karadzhov Date: Thu, 14 Aug 2025 14:12:35 +0300 Subject: [PATCH 02/24] feat(linked-list): Add EmptyAwareSinglyLinkedList and enhance DoublyLinkedList functionality - Introduced `EmptyAwareSinglyLinkedList`, a subclass of `SinglyLinkedList` that emits an `empty` event when the list becomes empty due to `reset`, `shift`, or `remove` operations. - Added `nodes()` iterator method to `DoublyLinkedList` for iterating over nodes directly. - Enhanced unit tests for `DoublyLinkedList` and `SinglyLinkedList` to cover edge cases and new functionality. - Added comprehensive tests for `EmptyAwareSinglyLinkedList` to validate `empty` event emission under various scenarios. - Improved code formatting and consistency. --- .../client/lib/client/linked-list.spec.ts | 111 ++++++++++++++---- packages/client/lib/client/linked-list.ts | 41 ++++++- 2 files changed, 124 insertions(+), 28 deletions(-) diff --git a/packages/client/lib/client/linked-list.spec.ts b/packages/client/lib/client/linked-list.spec.ts index 9547fb81c7..c791d21900 100644 --- a/packages/client/lib/client/linked-list.spec.ts +++ b/packages/client/lib/client/linked-list.spec.ts @@ -1,138 +1,197 @@ -import { SinglyLinkedList, DoublyLinkedList } from './linked-list'; -import { equal, deepEqual } from 'assert/strict'; - -describe('DoublyLinkedList', () => { +import { + SinglyLinkedList, + DoublyLinkedList, + EmptyAwareSinglyLinkedList, +} from "./linked-list"; +import { equal, deepEqual } from "assert/strict"; + +describe("DoublyLinkedList", () => { const list = new DoublyLinkedList(); - it('should start empty', () => { + it("should start empty", () => { equal(list.length, 0); equal(list.head, undefined); equal(list.tail, undefined); deepEqual(Array.from(list), []); }); - it('shift empty', () => { + it("shift empty", () => { equal(list.shift(), undefined); equal(list.length, 0); deepEqual(Array.from(list), []); }); - it('push 1', () => { + it("push 1", () => { list.push(1); equal(list.length, 1); deepEqual(Array.from(list), [1]); }); - it('push 2', () => { + it("push 2", () => { list.push(2); equal(list.length, 2); deepEqual(Array.from(list), [1, 2]); }); - it('unshift 0', () => { + it("unshift 0", () => { list.unshift(0); equal(list.length, 3); deepEqual(Array.from(list), [0, 1, 2]); }); - it('remove middle node', () => { + it("remove middle node", () => { list.remove(list.head!.next!); equal(list.length, 2); deepEqual(Array.from(list), [0, 2]); }); - it('remove head', () => { + it("remove head", () => { list.remove(list.head!); equal(list.length, 1); deepEqual(Array.from(list), [2]); }); - it('remove tail', () => { + it("remove tail", () => { list.remove(list.tail!); equal(list.length, 0); deepEqual(Array.from(list), []); }); - it('unshift empty queue', () => { + it("unshift empty queue", () => { list.unshift(0); equal(list.length, 1); deepEqual(Array.from(list), [0]); }); - it('push 1', () => { + it("push 1", () => { list.push(1); equal(list.length, 2); deepEqual(Array.from(list), [0, 1]); }); - it('shift', () => { + it("shift", () => { equal(list.shift(), 0); equal(list.length, 1); deepEqual(Array.from(list), [1]); }); - it('shift last element', () => { + it("shift last element", () => { equal(list.shift(), 1); equal(list.length, 0); deepEqual(Array.from(list), []); }); + + it("provide forEach for nodes", () => { + list.reset(); + list.push(1); + list.push(2); + list.push(3); + let count = 0; + for(const _ of list.nodes()) { + count++; + } + equal(count, 3); + for(const _ of list.nodes()) { + count++; + } + equal(count, 6); + }); }); -describe('SinglyLinkedList', () => { +describe("SinglyLinkedList", () => { const list = new SinglyLinkedList(); - it('should start empty', () => { + it("should start empty", () => { equal(list.length, 0); equal(list.head, undefined); equal(list.tail, undefined); deepEqual(Array.from(list), []); }); - it('shift empty', () => { + it("shift empty", () => { equal(list.shift(), undefined); equal(list.length, 0); deepEqual(Array.from(list), []); }); - it('push 1', () => { + it("push 1", () => { list.push(1); equal(list.length, 1); deepEqual(Array.from(list), [1]); }); - it('push 2', () => { + it("push 2", () => { list.push(2); equal(list.length, 2); deepEqual(Array.from(list), [1, 2]); }); - it('push 3', () => { + it("push 3", () => { list.push(3); equal(list.length, 3); deepEqual(Array.from(list), [1, 2, 3]); }); - it('shift 1', () => { + it("shift 1", () => { equal(list.shift(), 1); equal(list.length, 2); deepEqual(Array.from(list), [2, 3]); }); - it('shift 2', () => { + it("shift 2", () => { equal(list.shift(), 2); equal(list.length, 1); deepEqual(Array.from(list), [3]); }); - it('shift 3', () => { + it("shift 3", () => { equal(list.shift(), 3); equal(list.length, 0); deepEqual(Array.from(list), []); }); - it('should be empty', () => { + it("should be empty", () => { equal(list.length, 0); equal(list.head, undefined); equal(list.tail, undefined); }); }); + +describe("EmptyAwareSinglyLinkedList", () => { + it("should emit 'empty' event when reset", () => { + const list = new EmptyAwareSinglyLinkedList(); + let count = 0; + list.events.on("empty", () => count++); + list.push(1); + list.reset(); + equal(count, 1); + list.reset(); + equal(count, 1); + }); + + it("should emit 'empty' event when shift makes the list empty", () => { + const list = new EmptyAwareSinglyLinkedList(); + let count = 0; + list.events.on("empty", () => count++); + list.push(1); + list.push(2); + list.shift(); + equal(count, 0); + list.shift(); + equal(count, 1); + list.shift(); + equal(count, 1); + }); + + it("should emit 'empty' event when remove makes the list empty", () => { + const list = new EmptyAwareSinglyLinkedList(); + let count = 0; + list.events.on("empty", () => count++); + const node1 = list.push(1); + const node2 = list.push(2); + list.remove(node1, undefined); + equal(count, 0); + list.remove(node2, undefined); + equal(count, 1); + }); +}); diff --git a/packages/client/lib/client/linked-list.ts b/packages/client/lib/client/linked-list.ts index 29678f027b..461f1d4082 100644 --- a/packages/client/lib/client/linked-list.ts +++ b/packages/client/lib/client/linked-list.ts @@ -1,3 +1,5 @@ +import EventEmitter from "events"; + export interface DoublyLinkedNode { value: T; previous: DoublyLinkedNode | undefined; @@ -32,7 +34,7 @@ export class DoublyLinkedList { next: undefined, value }; - } + } return this.#tail = this.#tail.next = { previous: this.#tail, @@ -93,7 +95,7 @@ export class DoublyLinkedList { node.previous!.next = node.next; node.previous = undefined; } - + node.next = undefined; } @@ -109,6 +111,14 @@ export class DoublyLinkedList { node = node.next; } } + + *nodes() { + let node = this.#head; + while(node) { + yield node; + node = node.next; + } + } } export interface SinglyLinkedNode { @@ -201,3 +211,30 @@ export class SinglyLinkedList { } } } + +export class EmptyAwareSinglyLinkedList extends SinglyLinkedList { + readonly events = new EventEmitter(); + reset() { + const old = this.length; + super.reset(); + if(old !== this.length && this.length === 0) { + this.events.emit('empty'); + } + } + shift(): T | undefined { + const old = this.length; + const ret = super.shift(); + if(old !== this.length && this.length === 0) { + this.events.emit('empty'); + } + return ret; + } + remove(node: SinglyLinkedNode, parent: SinglyLinkedNode | undefined) { + const old = this.length; + super.remove(node, parent); + if(old !== this.length && this.length === 0) { + this.events.emit('empty'); + } + } + +} From f20d903dc500e7d04d5e98d04c359e6d3d0fd3d8 Mon Sep 17 00:00:00 2001 From: Nikolay Karadzhov Date: Thu, 14 Aug 2025 14:21:52 +0300 Subject: [PATCH 03/24] refactor(commands-queue): Improve push notification handling - Replaced `setInvalidateCallback` with a more flexible `addPushHandler` method, allowing multiple handlers for push notifications. - Introduced the `PushHandler` type to standardize push notification processing. - Refactored `RedisCommandsQueue` to use a `#pushHandlers` array, enabling dynamic and modular handling of push notifications. - Updated `RedisClient` to leverage the new handler mechanism for `invalidate` push notifications, simplifying and decoupling logic. --- packages/client/lib/client/commands-queue.ts | 35 ++++++++------------ packages/client/lib/client/index.ts | 14 +++++++- 2 files changed, 27 insertions(+), 22 deletions(-) diff --git a/packages/client/lib/client/commands-queue.ts b/packages/client/lib/client/commands-queue.ts index 52a07a7e3b..dd8f9ebe3a 100644 --- a/packages/client/lib/client/commands-queue.ts +++ b/packages/client/lib/client/commands-queue.ts @@ -50,6 +50,13 @@ const RESP2_PUSH_TYPE_MAPPING = { [RESP_TYPES.SIMPLE_STRING]: Buffer }; +// Try to handle a push notification. Return whether you +// successfully consumed the notification or not. This is +// important in order for the queue to be able to pass the +// notification to another handler if the current one did not +// succeed. +type PushHandler = (pushItems: Array) => boolean; + export default class RedisCommandsQueue { readonly #respVersion; readonly #maxLength; @@ -60,12 +67,11 @@ export default class RedisCommandsQueue { readonly decoder; readonly #pubSub = new PubSub(); + #pushHandlers: PushHandler[] = [this.#onPush.bind(this)]; get isPubSubActive() { return this.#pubSub.isActive; } - #invalidateCallback?: (key: RedisArgument | null) => unknown; - constructor( respVersion: RespVersions, maxLength: number | null | undefined, @@ -107,6 +113,7 @@ export default class RedisCommandsQueue { } return true; } + return false } #getTypeMapping() { @@ -119,30 +126,16 @@ export default class RedisCommandsQueue { onErrorReply: err => this.#onErrorReply(err), //TODO: we can shave off a few cycles by not adding onPush handler at all if CSC is not used onPush: push => { - if (!this.#onPush(push)) { - // currently only supporting "invalidate" over RESP3 push messages - switch (push[0].toString()) { - case "invalidate": { - if (this.#invalidateCallback) { - if (push[1] !== null) { - for (const key of push[1]) { - this.#invalidateCallback(key); - } - } else { - this.#invalidateCallback(null); - } - } - break; - } - } + for(const pushHandler of this.#pushHandlers) { + if(pushHandler(push)) return } }, getTypeMapping: () => this.#getTypeMapping() }); } - setInvalidateCallback(callback?: (key: RedisArgument | null) => unknown) { - this.#invalidateCallback = callback; + addPushHandler(handler: PushHandler): void { + this.#pushHandlers.push(handler); } addCommand( @@ -432,7 +425,7 @@ export default class RedisCommandsQueue { } static #removeTimeoutListener(command: CommandToWrite) { - command.timeout!.signal.removeEventListener('abort', command.timeout!.listener); + command.timeout?.signal.removeEventListener('abort', command.timeout!.listener); } static #flushToWrite(toBeSent: CommandToWrite, err: Error) { diff --git a/packages/client/lib/client/index.ts b/packages/client/lib/client/index.ts index 57b1231670..927a7ba766 100644 --- a/packages/client/lib/client/index.ts +++ b/packages/client/lib/client/index.ts @@ -464,7 +464,19 @@ export default class RedisClient< const cscConfig = options.clientSideCache; this.#clientSideCache = new BasicClientSideCache(cscConfig); } - this.#queue.setInvalidateCallback(this.#clientSideCache.invalidate.bind(this.#clientSideCache)); + this.#queue.addPushHandler((push: Array): boolean => { + if (push[0].toString() !== 'invalidate') return false; + + if (push[1] !== null) { + for (const key of push[1]) { + this.#clientSideCache?.invalidate(key) + } + } else { + this.#clientSideCache?.invalidate(null) + } + + return true + }); } } From 160a0c6323032d8a3d36a75380a09b18c1452cbf Mon Sep 17 00:00:00 2001 From: Nikolay Karadzhov Date: Thu, 14 Aug 2025 14:25:52 +0300 Subject: [PATCH 04/24] feat(commands-queue): Add method to wait for in-flight commands to complete - Introduced `waitForInflightCommandsToComplete` method to asynchronously wait for all in-flight commands to finish processing. - Utilized the `empty` event from `#waitingForReply` to signal when all commands have been completed. --- packages/client/lib/client/commands-queue.ts | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/packages/client/lib/client/commands-queue.ts b/packages/client/lib/client/commands-queue.ts index dd8f9ebe3a..a2016623e6 100644 --- a/packages/client/lib/client/commands-queue.ts +++ b/packages/client/lib/client/commands-queue.ts @@ -138,6 +138,17 @@ export default class RedisCommandsQueue { this.#pushHandlers.push(handler); } + async waitForInflightCommandsToComplete(): Promise { + // In-flight commands already completed + if(this.#waitingForReply.length === 0) { + return + }; + // Otherwise wait for in-flight commands to fire `empty` event + return new Promise(resolve => { + this.#waitingForReply.events.on('empty', resolve) + }); + } + addCommand( args: ReadonlyArray, options?: CommandOptions From 20ab341b17a77aa01ea32130b28613e01f4efb91 Mon Sep 17 00:00:00 2001 From: Nikolay Karadzhov Date: Thu, 14 Aug 2025 14:28:55 +0300 Subject: [PATCH 05/24] feat(commands-queue): Introduce maintenance mode support for commands-queue - Added `#inMaintenance` property and `set inMaintenance` setter to track maintenance mode state.d `#maintenanceCommandTimeout` and `setMaintenanceCommandTimeout` method to dynamically adjust command timeouts during maintenance.mmandTimeout` over individual command timeouts.DuringMaintananceError` is used when in maintenance mode. --- packages/client/lib/client/commands-queue.ts | 62 ++++++++++++++++++-- 1 file changed, 56 insertions(+), 6 deletions(-) diff --git a/packages/client/lib/client/commands-queue.ts b/packages/client/lib/client/commands-queue.ts index a2016623e6..0f9341b484 100644 --- a/packages/client/lib/client/commands-queue.ts +++ b/packages/client/lib/client/commands-queue.ts @@ -1,9 +1,9 @@ -import { SinglyLinkedList, DoublyLinkedNode, DoublyLinkedList } from './linked-list'; +import { DoublyLinkedNode, DoublyLinkedList, EmptyAwareSinglyLinkedList } from './linked-list'; import encodeCommand from '../RESP/encoder'; import { Decoder, PUSH_TYPE_MAPPING, RESP_TYPES } from '../RESP/decoder'; import { TypeMapping, ReplyUnion, RespVersions, RedisArgument } from '../RESP/types'; import { ChannelListeners, PubSub, PubSubCommand, PubSubListener, PubSubType, PubSubTypeListeners } from './pub-sub'; -import { AbortError, ErrorReply, TimeoutError } from '../errors'; +import { AbortError, ErrorReply, CommandTimeoutDuringMaintananceError, TimeoutError } from '../errors'; import { MonitorCallback } from '.'; export interface CommandOptions { @@ -30,6 +30,7 @@ export interface CommandToWrite extends CommandWaitingForReply { timeout: { signal: AbortSignal; listener: () => unknown; + originalTimeout: number | undefined; } | undefined; } @@ -61,13 +62,58 @@ export default class RedisCommandsQueue { readonly #respVersion; readonly #maxLength; readonly #toWrite = new DoublyLinkedList(); - readonly #waitingForReply = new SinglyLinkedList(); + readonly #waitingForReply = new EmptyAwareSinglyLinkedList(); readonly #onShardedChannelMoved; #chainInExecution: symbol | undefined; readonly decoder; readonly #pubSub = new PubSub(); #pushHandlers: PushHandler[] = [this.#onPush.bind(this)]; + + #inMaintenance = false; + + set inMaintenance(value: boolean) { + this.#inMaintenance = value; + } + + #maintenanceCommandTimeout: number | undefined + + setMaintenanceCommandTimeout(ms: number | undefined) { + // Prevent possible api misuse + if (this.#maintenanceCommandTimeout === ms) return; + + this.#maintenanceCommandTimeout = ms; + + let counter = 0; + + // Overwrite timeouts of all eligible toWrite commands + for(const node of this.#toWrite.nodes()) { + const command = node.value; + + // Remove timeout listener if it exists + RedisCommandsQueue.#removeTimeoutListener(command) + + // Determine newTimeout + const newTimeout = this.#maintenanceCommandTimeout ?? command.timeout?.originalTimeout; + // if no timeout is given and the command didnt have any timeout before, skip + if (!newTimeout) return; + + counter++; + + // Overwrite the command's timeout + const signal = AbortSignal.timeout(newTimeout); + command.timeout = { + signal, + listener: () => { + this.#toWrite.remove(node); + command.reject(this.#inMaintenance ? new CommandTimeoutDuringMaintananceError(newTimeout) : new TimeoutError()); + }, + originalTimeout: command.timeout?.originalTimeout + }; + signal.addEventListener('abort', command.timeout.listener, { once: true }); + }; + } + get isPubSubActive() { return this.#pubSub.isActive; } @@ -172,15 +218,19 @@ export default class RedisCommandsQueue { typeMapping: options?.typeMapping }; - const timeout = options?.timeout; + // If #maintenanceCommandTimeout was explicitly set, we should + // use it instead of the timeout provided by the command + const timeout = this.#maintenanceCommandTimeout || options?.timeout if (timeout) { + const signal = AbortSignal.timeout(timeout); value.timeout = { signal, listener: () => { this.#toWrite.remove(node); - value.reject(new TimeoutError()); - } + value.reject(this.#inMaintenance ? new CommandTimeoutDuringMaintananceError(timeout) : new TimeoutError()); + }, + originalTimeout: options?.timeout }; signal.addEventListener('abort', value.timeout.listener, { once: true }); } From e70c12b535ad642fecc67d1e745a83cc9c523bf0 Mon Sep 17 00:00:00 2001 From: Nikolay Karadzhov Date: Thu, 14 Aug 2025 14:33:26 +0300 Subject: [PATCH 06/24] refator(client): Extract socket event listener setup into helper method --- packages/client/lib/client/index.ts | 59 ++++++++++++++++------------- 1 file changed, 32 insertions(+), 27 deletions(-) diff --git a/packages/client/lib/client/index.ts b/packages/client/lib/client/index.ts index 927a7ba766..e52bd5e828 100644 --- a/packages/client/lib/client/index.ts +++ b/packages/client/lib/client/index.ts @@ -694,6 +694,35 @@ export default class RedisClient< return commands; } + #attachListeners(socket: RedisSocket) { + socket.on('data', chunk => { + try { + this.#queue.decoder.write(chunk); + } catch (err) { + this.#queue.resetDecoder(); + this.emit('error', err); + } + }) + .on('error', err => { + this.emit('error', err); + this.#clientSideCache?.onError(); + if (this.#socket.isOpen && !this.#options?.disableOfflineQueue) { + this.#queue.flushWaitingForReply(err); + } else { + this.#queue.flushAll(err); + } + }) + .on('connect', () => this.emit('connect')) + .on('ready', () => { + this.emit('ready'); + this.#setPingTimer(); + this.#maybeScheduleWrite(); + }) + .on('reconnecting', () => this.emit('reconnecting')) + .on('drain', () => this.#maybeScheduleWrite()) + .on('end', () => this.emit('end')); + } + #initiateSocket(): RedisSocket { const socketInitiator = async () => { const promises = [], @@ -725,33 +754,9 @@ export default class RedisClient< } }; - return new RedisSocket(socketInitiator, this.#options?.socket) - .on('data', chunk => { - try { - this.#queue.decoder.write(chunk); - } catch (err) { - this.#queue.resetDecoder(); - this.emit('error', err); - } - }) - .on('error', err => { - this.emit('error', err); - this.#clientSideCache?.onError(); - if (this.#socket.isOpen && !this.#options?.disableOfflineQueue) { - this.#queue.flushWaitingForReply(err); - } else { - this.#queue.flushAll(err); - } - }) - .on('connect', () => this.emit('connect')) - .on('ready', () => { - this.emit('ready'); - this.#setPingTimer(); - this.#maybeScheduleWrite(); - }) - .on('reconnecting', () => this.emit('reconnecting')) - .on('drain', () => this.#maybeScheduleWrite()) - .on('end', () => this.emit('end')); + const socket = new RedisSocket(socketInitiator, this.#options?.socket); + this.#attachListeners(socket); + return socket; } #pingTimer?: NodeJS.Timeout; From 98d5c1323f4346da9856eb98f5acdc87b5b54e22 Mon Sep 17 00:00:00 2001 From: Nikolay Karadzhov Date: Thu, 14 Aug 2025 14:55:12 +0300 Subject: [PATCH 07/24] refactor(socket): Add maintenance mode support and dynamic timeout handling - Introduced `#inMaintenance` property and setter to track maintenance mode state in `RedisSocket`. - Added `#maintenanceTimeout` and `setMaintenanceTimeout` method to dynamically adjust socket timeouts during maintenance. - Enhanced timeout error handling to differentiate between regular timeouts (`SocketTimeoutError`) and maintenance-specific timeouts (`SocketTimeoutDuringMaintananceError`). --- packages/client/lib/client/socket.ts | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/packages/client/lib/client/socket.ts b/packages/client/lib/client/socket.ts index 5f0bcc4492..5193e9d8a3 100644 --- a/packages/client/lib/client/socket.ts +++ b/packages/client/lib/client/socket.ts @@ -1,7 +1,7 @@ import { EventEmitter, once } from 'node:events'; import net from 'node:net'; import tls from 'node:tls'; -import { ConnectionTimeoutError, ClientClosedError, SocketClosedUnexpectedlyError, ReconnectStrategyError, SocketTimeoutError } from '../errors'; +import { ConnectionTimeoutError, ClientClosedError, SocketClosedUnexpectedlyError, ReconnectStrategyError, SocketTimeoutError, SocketTimeoutDuringMaintananceError } from '../errors'; import { setTimeout } from 'node:timers/promises'; import { RedisArgument } from '../RESP/types'; @@ -60,6 +60,8 @@ export default class RedisSocket extends EventEmitter { readonly #socketFactory; readonly #socketTimeout; + #maintenanceTimeout: number | undefined; + #socket?: net.Socket | tls.TLSSocket; #isOpen = false; @@ -82,6 +84,12 @@ export default class RedisSocket extends EventEmitter { return this.#socketEpoch; } + #inMaintenance = false; + + set inMaintenance(value: boolean) { + this.#inMaintenance = value; + } + constructor(initiator: RedisSocketInitiator, options?: RedisSocketOptions) { super(); @@ -238,6 +246,18 @@ export default class RedisSocket extends EventEmitter { } while (this.#isOpen && !this.#isReady); } + setMaintenanceTimeout(ms?: number) { + if (this.#maintenanceTimeout === ms) return; + + this.#maintenanceTimeout = ms; + + if(ms !== undefined) { + this.#socket?.setTimeout(ms); + } else { + this.#socket?.setTimeout(this.#socketTimeout ?? 0); + } + } + async #createSocket(): Promise { const socket = this.#socketFactory.create(); @@ -260,7 +280,10 @@ export default class RedisSocket extends EventEmitter { if (this.#socketTimeout) { socket.once('timeout', () => { - socket.destroy(new SocketTimeoutError(this.#socketTimeout!)); + const error = this.#inMaintenance + ? new SocketTimeoutDuringMaintananceError(this.#socketTimeout!) + : new SocketTimeoutError(this.#socketTimeout!) + socket.destroy(error); }); socket.setTimeout(this.#socketTimeout); } From 6ef87c95a362586e0e8f691ed02b1e476e8576d1 Mon Sep 17 00:00:00 2001 From: Nikolay Karadzhov Date: Thu, 14 Aug 2025 14:58:33 +0300 Subject: [PATCH 08/24] feat(client): Add Redis Enterprise maintenance configuration options - Introduced `maintPushNotifications` option to control how the client handles Redis Enterprise maintenance push notifications (`disabled`, `enabled`, `au to`). - Added `maintMovingEndpointType` option to specify the endpoint type for reconnecting during a MOVING notification (`auto`, `internal-ip`, `external-ip`, etc.). - Added `maintRelaxedCommandTimeout` option to define a relaxed timeout for commands during maintenance. - Added `maintRelaxedSocketTimeout` option to define a relaxed timeout for the socket during maintenance. - Enforced RESP3 requirement for maintenance-related features (`maintPushNotifications`). --- packages/client/lib/client/index.ts | 46 ++++++++++++++++++++++++++++- 1 file changed, 45 insertions(+), 1 deletion(-) diff --git a/packages/client/lib/client/index.ts b/packages/client/lib/client/index.ts index e52bd5e828..9c10551d8e 100644 --- a/packages/client/lib/client/index.ts +++ b/packages/client/lib/client/index.ts @@ -144,7 +144,46 @@ export interface RedisClientOptions< * Tag to append to library name that is sent to the Redis server */ clientInfoTag?: string; -} + /** + * Controls how the client handles Redis Enterprise maintenance push notifications. + * + * - `disabled`: The feature is not used by the client. + * - `enabled`: The client attempts to enable the feature on the server. If the server responds with an error, the connection is interrupted. + * - `auto`: The client attempts to enable the feature on the server. If the server returns an error, the client disables the feature and continues. + * + * The default is `auto`. + */ + maintPushNotifications?: 'disabled' | 'enabled' | 'auto'; + /** + * Controls how the client requests the endpoint to reconnect to during a MOVING notification in Redis Enterprise maintenance. + * + * - `auto`: If the connection is opened to a name or IP address that is from/resolves to a reserved private IP range, request an internal endpoint (e.g., internal-ip), otherwise an external one. If TLS is enabled, then request a FQDN. + * - `internal-ip`: Enforce requesting the internal IP. + * - `internal-fqdn`: Enforce requesting the internal FQDN. + * - `external-ip`: Enforce requesting the external IP address. + * - `external-fqdn`: Enforce requesting the external FQDN. + * - `none`: Used to request a null endpoint, which tells the client to reconnect based on its current config + + * The default is `auto`. + */ + maintMovingEndpointType?: MovingEndpointType; + /** + * Specifies a more relaxed timeout (in milliseconds) for commands during a maintenance window. + * This helps minimize command timeouts during maintenance. If not provided, the `commandOptions.timeout` + * will be used instead. Timeouts during maintenance period result in a `CommandTimeoutDuringMaintanance` error. + * + * The default is 10000 + */ + maintRelaxedCommandTimeout?: number; + /** + * Specifies a more relaxed timeout (in milliseconds) for the socket during a maintenance window. + * This helps minimize socket timeouts during maintenance. If not provided, the `socket.timeout` + * will be used instead. Timeouts during maintenance period result in a `SocketTimeoutDuringMaintanance` error. + * + * The default is 10000 + */ + maintRelaxedSocketTimeout?: number; +}; export type WithCommands< RESP extends RespVersions, @@ -485,7 +524,12 @@ export default class RedisClient< throw new Error('Client Side Caching is only supported with RESP3'); } + if (options?.maintPushNotifications && options?.maintPushNotifications !== 'disabled' && options?.RESP !== 3) { + throw new Error('Graceful Maintenance is only supported with RESP3'); + } + } + #initiateOptions(options?: RedisClientOptions): RedisClientOptions | undefined { // Convert username/password to credentialsProvider if no credentialsProvider is already in place From 373a2289b9c3f3eb4a37c539caa8ee121f136f00 Mon Sep 17 00:00:00 2001 From: Nikolay Karadzhov Date: Thu, 14 Aug 2025 15:09:15 +0300 Subject: [PATCH 09/24] feat(client): Add socket helpers and pause mechanism - Introduced `#paused` flag with corresponding `_pause` and `_unpause` methods to temporarily halt writing commands to the socket during maintenance windows. - Updated `#write` method to respect the `#paused` flag, preventing new commands from being written during maintenance. - Added `_ejectSocket` method to safely detach from and return the current socket - Added `_insertSocket` method to receive and start using a new socket --- packages/client/lib/client/index.ts | 48 +++++++++++++++++++++++++++-- 1 file changed, 46 insertions(+), 2 deletions(-) diff --git a/packages/client/lib/client/index.ts b/packages/client/lib/client/index.ts index 9c10551d8e..6c3f81187e 100644 --- a/packages/client/lib/client/index.ts +++ b/packages/client/lib/client/index.ts @@ -429,7 +429,7 @@ export default class RedisClient< } readonly #options?: RedisClientOptions; - readonly #socket: RedisSocket; + #socket: RedisSocket; readonly #queue: RedisCommandsQueue; #selectedDB = 0; #monitorCallback?: MonitorCallback; @@ -442,11 +442,16 @@ export default class RedisClient< #watchEpoch?: number; #clientSideCache?: ClientSideCacheProvider; #credentialsSubscription: Disposable | null = null; + // Flag used to pause writing to the socket during maintenance windows. + // When true, prevents new commands from being written while waiting for: + // 1. New socket to be ready after maintenance redirect + // 2. In-flight commands on the old socket to complete + #paused = false; + get clientSideCache() { return this._self.#clientSideCache; } - get options(): RedisClientOptions | undefined { return this._self.#options; } @@ -912,6 +917,42 @@ export default class RedisClient< return this as unknown as RedisClientType; } + /** + * @internal + */ + _ejectSocket(): RedisSocket { + const socket = this._self.#socket; + // @ts-ignore + this.#socket = null; + socket.removeAllListeners(); + return socket; + } + + /** + * @intenal + */ + _insertSocket(socket: RedisSocket) { + if(this._self.#socket) { + this._self._ejectSocket().destroy(); + } + this._self.#socket = socket; + this._self.#attachListeners(this._self.#socket); + } + + /** + * @internal + */ + _pause() { + this._self.#paused = true; + } + + /** + * @internal + */ + _unpause() { + this._self.#paused = false; + } + /** * @internal */ @@ -1141,6 +1182,9 @@ export default class RedisClient< } #write() { + if(this.#paused) { + return + } this.#socket.write(this.#queue.commandsToWrite()); } From 5d745b434f887d6351485e36b8fabb7668da2916 Mon Sep 17 00:00:00 2001 From: Nikolay Karadzhov Date: Thu, 28 Aug 2025 11:09:42 +0300 Subject: [PATCH 10/24] feat(client): Add Redis Enterprise maintenance handling capabilities - Introduced `EnterpriseMaintenanceManager` to manage Redis Enterprise maintenance events and push notifications. - Integrated `EnterpriseMaintenanceManager` into `RedisClient` to handle maintenance push notifications and manage socket transitions. - Implemented graceful handling of MOVING, MIGRATING, and FAILOVER push notifications, including socket replacement and timeout adjustments. --- packages/client/lib/client/commands-queue.ts | 3 + .../client/enterprise-maintenance-manager.ts | 309 ++++++++++++++++++ packages/client/lib/client/index.ts | 30 +- packages/client/lib/client/socket.ts | 2 + 4 files changed, 341 insertions(+), 3 deletions(-) create mode 100644 packages/client/lib/client/enterprise-maintenance-manager.ts diff --git a/packages/client/lib/client/commands-queue.ts b/packages/client/lib/client/commands-queue.ts index 0f9341b484..ef0db6ad57 100644 --- a/packages/client/lib/client/commands-queue.ts +++ b/packages/client/lib/client/commands-queue.ts @@ -5,6 +5,7 @@ import { TypeMapping, ReplyUnion, RespVersions, RedisArgument } from '../RESP/ty import { ChannelListeners, PubSub, PubSubCommand, PubSubListener, PubSubType, PubSubTypeListeners } from './pub-sub'; import { AbortError, ErrorReply, CommandTimeoutDuringMaintananceError, TimeoutError } from '../errors'; import { MonitorCallback } from '.'; +import { dbgMaintenance } from './enterprise-maintenance-manager'; export interface CommandOptions { chainId?: symbol; @@ -79,6 +80,7 @@ export default class RedisCommandsQueue { #maintenanceCommandTimeout: number | undefined setMaintenanceCommandTimeout(ms: number | undefined) { + dbgMaintenance(`Setting maintenance command timeout to ${ms}`); // Prevent possible api misuse if (this.#maintenanceCommandTimeout === ms) return; @@ -112,6 +114,7 @@ export default class RedisCommandsQueue { }; signal.addEventListener('abort', command.timeout.listener, { once: true }); }; + dbgMaintenance(`Total of ${counter} timeouts reset to ${ms}`); } get isPubSubActive() { diff --git a/packages/client/lib/client/enterprise-maintenance-manager.ts b/packages/client/lib/client/enterprise-maintenance-manager.ts new file mode 100644 index 0000000000..60eaea5057 --- /dev/null +++ b/packages/client/lib/client/enterprise-maintenance-manager.ts @@ -0,0 +1,309 @@ +import { RedisClientOptions } from "."; +import RedisCommandsQueue from "./commands-queue"; +import { RedisArgument } from "../.."; +import { isIP } from "net"; +import { lookup } from "dns/promises"; +import assert from "node:assert"; +import { setTimeout } from "node:timers/promises"; +import RedisSocket from "./socket"; + +export const MAINTENANCE_EVENTS = { + PAUSE_WRITING: "pause-writing", + RESUME_WRITING: "resume-writing", + TIMEOUTS_UPDATE: "timeouts-update", +} as const; + +const PN = { + MOVING: "MOVING", + MIGRATING: "MIGRATING", + MIGRATED: "MIGRATED", + FAILING_OVER: "FAILING_OVER", + FAILED_OVER: "FAILED_OVER", +}; + +export const dbgMaintenance = (...args: any[]) => { + if (!process.env.DEBUG_MAINTENANCE) return; + return console.log("[MNT]", ...args); +}; + +export interface MaintenanceUpdate { + inMaintenance: boolean; + relaxedCommandTimeout?: number; + relaxedSocketTimeout?: number; +} + +interface Client { + _ejectSocket: () => RedisSocket; + _insertSocket: (socket: RedisSocket) => void; + _pause: () => void; + _unpause: () => void; + _maintenanceUpdate: (update: MaintenanceUpdate) => void; + duplicate: (options: RedisClientOptions) => Client; + connect: () => Promise; + destroy: () => void; +} + +export default class EnterpriseMaintenanceManager { + #commandsQueue: RedisCommandsQueue; + #options: RedisClientOptions; + #isMaintenance = 0; + #client: Client; + + static setupDefaultMaintOptions(options: RedisClientOptions) { + if (options.maintPushNotifications === undefined) { + options.maintPushNotifications = + options?.RESP === 3 ? "auto" : "disabled"; + } + if (options.maintMovingEndpointType === undefined) { + options.maintMovingEndpointType = "auto"; + } + if (options.maintRelaxedSocketTimeout === undefined) { + options.maintRelaxedSocketTimeout = 10000; + } + if (options.maintRelaxedCommandTimeout === undefined) { + options.maintRelaxedCommandTimeout = 10000; + } + } + + static async getHandshakeCommand( + tls: boolean, + host: string, + options: RedisClientOptions, + ): Promise< + | { cmd: Array; errorHandler: (error: Error) => void } + | undefined + > { + if (options.maintPushNotifications === "disabled") return; + + const movingEndpointType = await determineEndpoint(tls, host, options); + return { + cmd: [ + "CLIENT", + "MAINT_NOTIFICATIONS", + "ON", + "moving-endpoint-type", + movingEndpointType, + ], + errorHandler: (error: Error) => { + dbgMaintenance("handshake failed:", error); + if (options.maintPushNotifications === "enabled") { + throw error; + } + }, + }; + } + + constructor( + commandsQueue: RedisCommandsQueue, + client: Client, + options: RedisClientOptions, + ) { + this.#commandsQueue = commandsQueue; + this.#options = options; + this.#client = client; + + this.#commandsQueue.addPushHandler(this.#onPush); + } + + #onPush = (push: Array): boolean => { + dbgMaintenance("ONPUSH:", push.map(String)); + switch (push[0].toString()) { + case PN.MOVING: { + // [ 'MOVING', '17', '15', '54.78.247.156:12075' ] + // ^seq ^after ^new ip + const afterSeconds = push[2]; + const url: string | null = push[3] ? String(push[3]) : null; + dbgMaintenance("Received MOVING:", afterSeconds, url); + this.#onMoving(afterSeconds, url); + return true; + } + case PN.MIGRATING: + case PN.FAILING_OVER: { + dbgMaintenance("Received MIGRATING|FAILING_OVER"); + this.#onMigrating(); + return true; + } + case PN.MIGRATED: + case PN.FAILED_OVER: { + dbgMaintenance("Received MIGRATED|FAILED_OVER"); + this.#onMigrated(); + return true; + } + } + return false; + }; + + // Queue: + // toWrite [ C D E ] + // waitingForReply [ A B ] - aka In-flight commands + // + // time: ---1-2---3-4-5-6--------------------------- + // + // 1. [EVENT] MOVING PN received + // 2. [ACTION] Pause writing ( we need to wait for new socket to connect and for all in-flight commands to complete ) + // 3. [EVENT] New socket connected + // 4. [EVENT] In-flight commands completed + // 5. [ACTION] Destroy old socket + // 6. [ACTION] Resume writing -> we are going to write to the new socket from now on + #onMoving = async ( + afterSeconds: number, + url: string | null, + ): Promise => { + // 1 [EVENT] MOVING PN received + this.#onMigrating(); + + let host: string; + let port: number; + + // The special value `none` indicates that the `MOVING` message doesn’t need + // to contain an endpoint. Instead it contains the value `null` then. In + // such a corner case, the client is expected to schedule a graceful + // reconnect to its currently configured endpoint after half of the grace + // period that was communicated by the server is over. + if (url === null) { + assert(this.#options.maintMovingEndpointType === "none"); + assert(this.#options.socket !== undefined); + assert("host" in this.#options.socket); + assert(typeof this.#options.socket.host === "string"); + host = this.#options.socket.host; + assert(typeof this.#options.socket.port === "number"); + port = this.#options.socket.port; + const waitTime = (afterSeconds * 1000) / 2; + dbgMaintenance(`Wait for ${waitTime}ms`); + await setTimeout(waitTime); + } else { + const split = url.split(":"); + host = split[0]; + port = Number(split[1]); + } + + // 2 [ACTION] Pause writing + dbgMaintenance("Pausing writing of new commands to old socket"); + this.#client._pause(); + + const tmpClient = this.#client.duplicate({ + maintPushNotifications: "disabled", + socket: { + ...this.#options.socket, + host, + port, + }, + }); + + dbgMaintenance(`Connecting tmp client: ${host}:${port}`); + await tmpClient.connect(); + dbgMaintenance(`Connected to tmp client`); + // 3 [EVENT] New socket connected + + //TODO + // dbgMaintenance( + // `Set timeout for new socket to ${this.#options.maintRelaxedSocketTimeout}`, + // ); + // newSocket.setMaintenanceTimeout(this.#options.maintRelaxedSocketTimeout); + + dbgMaintenance(`Wait for all in-flight commands to complete`); + await this.#commandsQueue.waitForInflightCommandsToComplete(); + dbgMaintenance(`In-flight commands completed`); + // 4 [EVENT] In-flight commands completed + + dbgMaintenance("Swap client sockets..."); + const oldSocket = this.#client._ejectSocket(); + const newSocket = tmpClient._ejectSocket(); + this.#client._insertSocket(newSocket); + tmpClient._insertSocket(oldSocket); + tmpClient.destroy(); + dbgMaintenance("Swap client sockets done."); + // 5 + 6 + dbgMaintenance("Resume writing"); + this.#client._unpause(); + this.#onMigrated(); + }; + + #onMigrating = async () => { + this.#isMaintenance++; + if (this.#isMaintenance > 1) { + dbgMaintenance(`Timeout relaxation already done`); + return; + } + + const update: MaintenanceUpdate = { + inMaintenance: true, + relaxedCommandTimeout: this.#options.maintRelaxedCommandTimeout, + relaxedSocketTimeout: this.#options.maintRelaxedSocketTimeout, + }; + + this.#client._maintenanceUpdate(update); + }; + + #onMigrated = async () => { + this.#isMaintenance--; + assert(this.#isMaintenance >= 0); + if (this.#isMaintenance > 0) { + dbgMaintenance(`Not ready to unrelax timeouts yet`); + return; + } + + const update: MaintenanceUpdate = { + inMaintenance : false + }; + + this.#client._maintenanceUpdate(update); + }; +} + +export type MovingEndpointType = + | "auto" + | "internal-ip" + | "internal-fqdn" + | "external-ip" + | "external-fqdn" + | "none"; + +function isPrivateIP(ip: string): boolean { + const version = isIP(ip); + if (version === 4) { + const octets = ip.split(".").map(Number); + return ( + octets[0] === 10 || + (octets[0] === 172 && octets[1] >= 16 && octets[1] <= 31) || + (octets[0] === 192 && octets[1] === 168) + ); + } + if (version === 6) { + return ( + ip.startsWith("fc") || // Unique local + ip.startsWith("fd") || // Unique local + ip === "::1" || // Loopback + ip.startsWith("fe80") // Link-local unicast + ); + } + return false; +} + +async function determineEndpoint( + tlsEnabled: boolean, + host: string, + options: RedisClientOptions, +): Promise { + assert(options.maintMovingEndpointType !== undefined); + if (options.maintMovingEndpointType !== "auto") { + dbgMaintenance( + `Determine endpoint type: ${options.maintMovingEndpointType}`, + ); + return options.maintMovingEndpointType; + } + + const ip = isIP(host) ? host : (await lookup(host, { family: 0 })).address; + + const isPrivate = isPrivateIP(ip); + + let result: MovingEndpointType; + if (tlsEnabled) { + result = isPrivate ? "internal-fqdn" : "external-fqdn"; + } else { + result = isPrivate ? "internal-ip" : "external-ip"; + } + + dbgMaintenance(`Determine endpoint type: ${result}`); + return result; +} diff --git a/packages/client/lib/client/index.ts b/packages/client/lib/client/index.ts index 6c3f81187e..44a16faaeb 100644 --- a/packages/client/lib/client/index.ts +++ b/packages/client/lib/client/index.ts @@ -1,5 +1,5 @@ import COMMANDS from '../commands'; -import RedisSocket, { RedisSocketOptions } from './socket'; +import RedisSocket, { RedisSocketOptions, RedisTcpSocketOptions } from './socket'; import { BasicAuth, CredentialsError, CredentialsProvider, StreamingCredentialsProvider, UnableToObtainNewCredentialsError, Disposable } from '../authx'; import RedisCommandsQueue, { CommandOptions } from './commands-queue'; import { EventEmitter } from 'node:events'; @@ -20,6 +20,7 @@ import { BasicClientSideCache, ClientSideCacheConfig, ClientSideCacheProvider } import { BasicCommandParser, CommandParser } from './parser'; import SingleEntryCache from '../single-entry-cache'; import { version } from '../../package.json' +import EnterpriseMaintenanceManager, { MaintenanceUpdate, MovingEndpointType } from './enterprise-maintenance-manager'; export interface RedisClientOptions< M extends RedisModules = RedisModules, @@ -501,6 +502,11 @@ export default class RedisClient< this.#queue = this.#initiateQueue(); this.#socket = this.#initiateSocket(); + + if(options?.maintPushNotifications !== 'disabled') { + new EnterpriseMaintenanceManager(this.#queue, this, this.#options!); + }; + if (options?.clientSideCache) { if (options.clientSideCache instanceof ClientSideCacheProvider) { this.#clientSideCache = options.clientSideCache; @@ -557,13 +563,15 @@ export default class RedisClient< this._commandOptions = options.commandOptions; } + if(options?.maintPushNotifications !== 'disabled') { + EnterpriseMaintenanceManager.setupDefaultMaintOptions(options!); + } + if (options?.url) { const parsedOptions = RedisClient.parseOptions(options); - if (parsedOptions?.database) { this._self.#selectedDB = parsedOptions.database; } - return parsedOptions; } @@ -740,6 +748,12 @@ export default class RedisClient< commands.push({cmd: this.#clientSideCache.trackingOn()}); } + const { tls, host } = this.#options!.socket as RedisTcpSocketOptions; + const maintenanceHandshakeCmd = await EnterpriseMaintenanceManager.getHandshakeCommand(!!tls, host!, this.#options!); + if(maintenanceHandshakeCmd) { + commands.push(maintenanceHandshakeCmd); + }; + return commands; } @@ -939,6 +953,16 @@ export default class RedisClient< this._self.#attachListeners(this._self.#socket); } + /** + * @internal + */ + _maintenanceUpdate(update: MaintenanceUpdate) { + this.#socket.inMaintenance = update.inMaintenance; + this.#socket.setMaintenanceTimeout(update.relaxedSocketTimeout); + this.#queue.inMaintenance = update.inMaintenance; + this.#queue.setMaintenanceCommandTimeout(update.relaxedCommandTimeout); + } + /** * @internal */ diff --git a/packages/client/lib/client/socket.ts b/packages/client/lib/client/socket.ts index 5193e9d8a3..1235b9c00f 100644 --- a/packages/client/lib/client/socket.ts +++ b/packages/client/lib/client/socket.ts @@ -4,6 +4,7 @@ import tls from 'node:tls'; import { ConnectionTimeoutError, ClientClosedError, SocketClosedUnexpectedlyError, ReconnectStrategyError, SocketTimeoutError, SocketTimeoutDuringMaintananceError } from '../errors'; import { setTimeout } from 'node:timers/promises'; import { RedisArgument } from '../RESP/types'; +import { dbgMaintenance } from './enterprise-maintenance-manager'; type NetOptions = { tls?: false; @@ -247,6 +248,7 @@ export default class RedisSocket extends EventEmitter { } setMaintenanceTimeout(ms?: number) { + dbgMaintenance(`Set socket timeout to ${ms}`); if (this.#maintenanceTimeout === ms) return; this.#maintenanceTimeout = ms; From 60f107582ba530266b3cf97e10c397719d6a47bd Mon Sep 17 00:00:00 2001 From: Nikolay Karadzhov Date: Fri, 15 Aug 2025 12:19:25 +0300 Subject: [PATCH 11/24] chore: various small improvements * add _self in client * measure tmp client creation * improve logging * set relaxed timeouts on tmp client --- packages/client/lib/client/commands-queue.ts | 3 ++- .../lib/client/enterprise-maintenance-manager.ts | 12 +++++++++--- packages/client/lib/client/index.ts | 10 +++++----- 3 files changed, 16 insertions(+), 9 deletions(-) diff --git a/packages/client/lib/client/commands-queue.ts b/packages/client/lib/client/commands-queue.ts index ef0db6ad57..6c7ef38a54 100644 --- a/packages/client/lib/client/commands-queue.ts +++ b/packages/client/lib/client/commands-queue.ts @@ -87,6 +87,7 @@ export default class RedisCommandsQueue { this.#maintenanceCommandTimeout = ms; let counter = 0; + const total = this.#toWrite.length; // Overwrite timeouts of all eligible toWrite commands for(const node of this.#toWrite.nodes()) { @@ -114,7 +115,7 @@ export default class RedisCommandsQueue { }; signal.addEventListener('abort', command.timeout.listener, { once: true }); }; - dbgMaintenance(`Total of ${counter} timeouts reset to ${ms}`); + dbgMaintenance(`Total of ${counter} of ${total} timeouts reset to ${ms}`); } get isPubSubActive() { diff --git a/packages/client/lib/client/enterprise-maintenance-manager.ts b/packages/client/lib/client/enterprise-maintenance-manager.ts index 60eaea5057..c941ce2086 100644 --- a/packages/client/lib/client/enterprise-maintenance-manager.ts +++ b/packages/client/lib/client/enterprise-maintenance-manager.ts @@ -181,18 +181,24 @@ export default class EnterpriseMaintenanceManager { dbgMaintenance("Pausing writing of new commands to old socket"); this.#client._pause(); + dbgMaintenance("Creating new tmp client"); + let start = performance.now(); const tmpClient = this.#client.duplicate({ - maintPushNotifications: "disabled", socket: { ...this.#options.socket, host, port, }, }); - + dbgMaintenance(`Tmp client created in ${( performance.now() - start ).toFixed(2)}ms`); dbgMaintenance(`Connecting tmp client: ${host}:${port}`); + start = performance.now(); + tmpClient._maintenanceUpdate({ + relaxedCommandTimeout: this.#options.maintRelaxedCommandTimeout, + relaxedSocketTimeout: this.#options.maintRelaxedSocketTimeout, + }); await tmpClient.connect(); - dbgMaintenance(`Connected to tmp client`); + dbgMaintenance(`Connected to tmp client in ${(performance.now() - start).toFixed(2)}ms`); // 3 [EVENT] New socket connected //TODO diff --git a/packages/client/lib/client/index.ts b/packages/client/lib/client/index.ts index 44a16faaeb..30dd41a056 100644 --- a/packages/client/lib/client/index.ts +++ b/packages/client/lib/client/index.ts @@ -937,7 +937,7 @@ export default class RedisClient< _ejectSocket(): RedisSocket { const socket = this._self.#socket; // @ts-ignore - this.#socket = null; + this._self.#socket = null; socket.removeAllListeners(); return socket; } @@ -957,10 +957,10 @@ export default class RedisClient< * @internal */ _maintenanceUpdate(update: MaintenanceUpdate) { - this.#socket.inMaintenance = update.inMaintenance; - this.#socket.setMaintenanceTimeout(update.relaxedSocketTimeout); - this.#queue.inMaintenance = update.inMaintenance; - this.#queue.setMaintenanceCommandTimeout(update.relaxedCommandTimeout); + this._self.#socket.inMaintenance = update.inMaintenance; + this._self.#socket.setMaintenanceTimeout(update.relaxedSocketTimeout); + this._self.#queue.inMaintenance = update.inMaintenance; + this._self.#queue.setMaintenanceCommandTimeout(update.relaxedCommandTimeout); } /** From 207473e93adaef9d3afcea49ce20fa8be885ad3d Mon Sep 17 00:00:00 2001 From: Nikolay Karadzhov Date: Fri, 15 Aug 2025 15:07:15 +0300 Subject: [PATCH 12/24] refactor(timeouts): remove redundant flag the inMaintenance flag is no longer needed, now that we have default values for maintRelaxedSocketTimeout and maintRelaxedCommandTimeout --- packages/client/lib/client/commands-queue.ts | 31 +++++++++---------- .../client/enterprise-maintenance-manager.ts | 5 ++- packages/client/lib/client/index.ts | 2 -- packages/client/lib/client/socket.ts | 15 ++++----- 4 files changed, 23 insertions(+), 30 deletions(-) diff --git a/packages/client/lib/client/commands-queue.ts b/packages/client/lib/client/commands-queue.ts index 6c7ef38a54..6893a04f14 100644 --- a/packages/client/lib/client/commands-queue.ts +++ b/packages/client/lib/client/commands-queue.ts @@ -71,21 +71,23 @@ export default class RedisCommandsQueue { #pushHandlers: PushHandler[] = [this.#onPush.bind(this)]; - #inMaintenance = false; - - set inMaintenance(value: boolean) { - this.#inMaintenance = value; - } - #maintenanceCommandTimeout: number | undefined setMaintenanceCommandTimeout(ms: number | undefined) { - dbgMaintenance(`Setting maintenance command timeout to ${ms}`); // Prevent possible api misuse - if (this.#maintenanceCommandTimeout === ms) return; + if (this.#maintenanceCommandTimeout === ms) { + dbgMaintenance(`Queue already set maintenanceCommandTimeout to ${ms}, skipping`); + return; + }; + dbgMaintenance(`Setting maintenance command timeout to ${ms}`); this.#maintenanceCommandTimeout = ms; + if(this.#maintenanceCommandTimeout === undefined) { + dbgMaintenance(`Queue will keep maintenanceCommandTimeout for exisitng commands, just to be on the safe side. New commands will receive normal timeouts`); + return; + } + let counter = 0; const total = this.#toWrite.length; @@ -96,12 +98,8 @@ export default class RedisCommandsQueue { // Remove timeout listener if it exists RedisCommandsQueue.#removeTimeoutListener(command) - // Determine newTimeout - const newTimeout = this.#maintenanceCommandTimeout ?? command.timeout?.originalTimeout; - // if no timeout is given and the command didnt have any timeout before, skip - if (!newTimeout) return; - counter++; + const newTimeout = this.#maintenanceCommandTimeout; // Overwrite the command's timeout const signal = AbortSignal.timeout(newTimeout); @@ -109,7 +107,7 @@ export default class RedisCommandsQueue { signal, listener: () => { this.#toWrite.remove(node); - command.reject(this.#inMaintenance ? new CommandTimeoutDuringMaintananceError(newTimeout) : new TimeoutError()); + command.reject(new CommandTimeoutDuringMaintananceError(newTimeout)); }, originalTimeout: command.timeout?.originalTimeout }; @@ -224,7 +222,8 @@ export default class RedisCommandsQueue { // If #maintenanceCommandTimeout was explicitly set, we should // use it instead of the timeout provided by the command - const timeout = this.#maintenanceCommandTimeout || options?.timeout + const timeout = this.#maintenanceCommandTimeout ?? options?.timeout; + const wasInMaintenance = this.#maintenanceCommandTimeout !== undefined; if (timeout) { const signal = AbortSignal.timeout(timeout); @@ -232,7 +231,7 @@ export default class RedisCommandsQueue { signal, listener: () => { this.#toWrite.remove(node); - value.reject(this.#inMaintenance ? new CommandTimeoutDuringMaintananceError(timeout) : new TimeoutError()); + value.reject(wasInMaintenance ? new CommandTimeoutDuringMaintananceError(timeout) : new TimeoutError()); }, originalTimeout: options?.timeout }; diff --git a/packages/client/lib/client/enterprise-maintenance-manager.ts b/packages/client/lib/client/enterprise-maintenance-manager.ts index c941ce2086..449d9505c7 100644 --- a/packages/client/lib/client/enterprise-maintenance-manager.ts +++ b/packages/client/lib/client/enterprise-maintenance-manager.ts @@ -27,7 +27,6 @@ export const dbgMaintenance = (...args: any[]) => { }; export interface MaintenanceUpdate { - inMaintenance: boolean; relaxedCommandTimeout?: number; relaxedSocketTimeout?: number; } @@ -233,7 +232,6 @@ export default class EnterpriseMaintenanceManager { } const update: MaintenanceUpdate = { - inMaintenance: true, relaxedCommandTimeout: this.#options.maintRelaxedCommandTimeout, relaxedSocketTimeout: this.#options.maintRelaxedSocketTimeout, }; @@ -250,7 +248,8 @@ export default class EnterpriseMaintenanceManager { } const update: MaintenanceUpdate = { - inMaintenance : false + relaxedCommandTimeout: undefined, + relaxedSocketTimeout: undefined }; this.#client._maintenanceUpdate(update); diff --git a/packages/client/lib/client/index.ts b/packages/client/lib/client/index.ts index 30dd41a056..74275b20b7 100644 --- a/packages/client/lib/client/index.ts +++ b/packages/client/lib/client/index.ts @@ -957,9 +957,7 @@ export default class RedisClient< * @internal */ _maintenanceUpdate(update: MaintenanceUpdate) { - this._self.#socket.inMaintenance = update.inMaintenance; this._self.#socket.setMaintenanceTimeout(update.relaxedSocketTimeout); - this._self.#queue.inMaintenance = update.inMaintenance; this._self.#queue.setMaintenanceCommandTimeout(update.relaxedCommandTimeout); } diff --git a/packages/client/lib/client/socket.ts b/packages/client/lib/client/socket.ts index 1235b9c00f..9d8ebdae07 100644 --- a/packages/client/lib/client/socket.ts +++ b/packages/client/lib/client/socket.ts @@ -85,12 +85,6 @@ export default class RedisSocket extends EventEmitter { return this.#socketEpoch; } - #inMaintenance = false; - - set inMaintenance(value: boolean) { - this.#inMaintenance = value; - } - constructor(initiator: RedisSocketInitiator, options?: RedisSocketOptions) { super(); @@ -249,7 +243,10 @@ export default class RedisSocket extends EventEmitter { setMaintenanceTimeout(ms?: number) { dbgMaintenance(`Set socket timeout to ${ms}`); - if (this.#maintenanceTimeout === ms) return; + if (this.#maintenanceTimeout === ms) { + dbgMaintenance(`Socket already set maintenanceCommandTimeout to ${ms}, skipping`); + return; + }; this.#maintenanceTimeout = ms; @@ -282,8 +279,8 @@ export default class RedisSocket extends EventEmitter { if (this.#socketTimeout) { socket.once('timeout', () => { - const error = this.#inMaintenance - ? new SocketTimeoutDuringMaintananceError(this.#socketTimeout!) + const error = this.#maintenanceTimeout + ? new SocketTimeoutDuringMaintananceError(this.#maintenanceTimeout) : new SocketTimeoutError(this.#socketTimeout!) socket.destroy(error); }); From 9e4324810f707d6fe242d88b5fe0715addbcfbc7 Mon Sep 17 00:00:00 2001 From: Nikolay Karadzhov Date: Mon, 18 Aug 2025 11:11:50 +0300 Subject: [PATCH 13/24] fix: try to schedule write upon unpausing --- packages/client/lib/client/index.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/packages/client/lib/client/index.ts b/packages/client/lib/client/index.ts index 74275b20b7..b1a50e3fa0 100644 --- a/packages/client/lib/client/index.ts +++ b/packages/client/lib/client/index.ts @@ -973,6 +973,7 @@ export default class RedisClient< */ _unpause() { this._self.#paused = false; + this._self.#maybeScheduleWrite(); } /** From e9ca77d7e8ce2ff5da4f1fd4e8f847366c156a09 Mon Sep 17 00:00:00 2001 From: Nikolay Karadzhov Date: Fri, 22 Aug 2025 22:08:13 +0300 Subject: [PATCH 14/24] fix: update url for new client if present --- .../client/enterprise-maintenance-manager.ts | 22 +++++++++++++------ 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/packages/client/lib/client/enterprise-maintenance-manager.ts b/packages/client/lib/client/enterprise-maintenance-manager.ts index 449d9505c7..969d560adb 100644 --- a/packages/client/lib/client/enterprise-maintenance-manager.ts +++ b/packages/client/lib/client/enterprise-maintenance-manager.ts @@ -182,13 +182,21 @@ export default class EnterpriseMaintenanceManager { dbgMaintenance("Creating new tmp client"); let start = performance.now(); - const tmpClient = this.#client.duplicate({ - socket: { - ...this.#options.socket, - host, - port, - }, - }); + + const tmpOptions = this.#options; + // If the URL is provided, it takes precedense + if(tmpOptions.url) { + const u = new URL(tmpOptions.url); + u.hostname = host; + u.port = String(port); + tmpOptions.url = u.toString(); + } else { + tmpOptions.socket = { + ...tmpOptions.socket, + host, port + } + } + const tmpClient = this.#client.duplicate(tmpOptions); dbgMaintenance(`Tmp client created in ${( performance.now() - start ).toFixed(2)}ms`); dbgMaintenance(`Connecting tmp client: ${host}:${port}`); start = performance.now(); From cb2c30c58179b47c2e3a12936da0d440e27df8f0 Mon Sep 17 00:00:00 2001 From: Pavel Pashov Date: Mon, 25 Aug 2025 17:58:19 +0300 Subject: [PATCH 15/24] test: add E2E test infrastructure for Redis maintenance scenarios --- .../client/enterprise-maintenance-manager.ts | 30 ++++ .../test-scenario/fault-injector-client.ts | 152 ++++++++++++++++++ .../test-scenario/push-notification.e2e.ts | 94 +++++++++++ .../tests/test-scenario/test-scenario.util.ts | 110 +++++++++++++ 4 files changed, 386 insertions(+) create mode 100644 packages/client/lib/tests/test-scenario/fault-injector-client.ts create mode 100644 packages/client/lib/tests/test-scenario/push-notification.e2e.ts create mode 100644 packages/client/lib/tests/test-scenario/test-scenario.util.ts diff --git a/packages/client/lib/client/enterprise-maintenance-manager.ts b/packages/client/lib/client/enterprise-maintenance-manager.ts index 969d560adb..a9482d5335 100644 --- a/packages/client/lib/client/enterprise-maintenance-manager.ts +++ b/packages/client/lib/client/enterprise-maintenance-manager.ts @@ -6,6 +6,7 @@ import { lookup } from "dns/promises"; import assert from "node:assert"; import { setTimeout } from "node:timers/promises"; import RedisSocket from "./socket"; +import diagnostics_channel from "node:diagnostics_channel"; export const MAINTENANCE_EVENTS = { PAUSE_WRITING: "pause-writing", @@ -21,11 +22,24 @@ const PN = { FAILED_OVER: "FAILED_OVER", }; +export type DiagnosticsEvent = { + type: string; + timestamp: number; + data?: Object; +}; + export const dbgMaintenance = (...args: any[]) => { if (!process.env.DEBUG_MAINTENANCE) return; return console.log("[MNT]", ...args); }; +export const emitDiagnostics = (event: DiagnosticsEvent) => { + if (!process.env.EMIT_DIAGNOSTICS) return; + + const channel = diagnostics_channel.channel("redis.maintenance"); + channel.publish(event); +}; + export interface MaintenanceUpdate { relaxedCommandTimeout?: number; relaxedSocketTimeout?: number; @@ -113,18 +127,34 @@ export default class EnterpriseMaintenanceManager { const afterSeconds = push[2]; const url: string | null = push[3] ? String(push[3]) : null; dbgMaintenance("Received MOVING:", afterSeconds, url); + emitDiagnostics({ + type: PN.MOVING, + timestamp: Date.now(), + data: { + afterSeconds, + url, + }, + }); this.#onMoving(afterSeconds, url); return true; } case PN.MIGRATING: case PN.FAILING_OVER: { dbgMaintenance("Received MIGRATING|FAILING_OVER"); + emitDiagnostics({ + type: PN.MIGRATING, + timestamp: Date.now(), + }); this.#onMigrating(); return true; } case PN.MIGRATED: case PN.FAILED_OVER: { dbgMaintenance("Received MIGRATED|FAILED_OVER"); + emitDiagnostics({ + type: PN.MIGRATED, + timestamp: Date.now(), + }); this.#onMigrated(); return true; } diff --git a/packages/client/lib/tests/test-scenario/fault-injector-client.ts b/packages/client/lib/tests/test-scenario/fault-injector-client.ts new file mode 100644 index 0000000000..7db75e3081 --- /dev/null +++ b/packages/client/lib/tests/test-scenario/fault-injector-client.ts @@ -0,0 +1,152 @@ +import { setTimeout } from "node:timers/promises"; + +export type ActionType = + | "dmc_restart" + | "failover" + | "reshard" + | "sequence_of_actions" + | "network_failure" + | "execute_rlutil_command" + | "execute_rladmin_command" + | "migrate" + | "bind"; + +export interface ActionRequest { + type: ActionType; + parameters?: { + bdb_id?: string; + [key: string]: unknown; + }; +} + +export interface ActionStatus { + status: string; + error: unknown; + output: string; +} + +export class FaultInjectorClient { + private baseUrl: string; + #fetch: typeof fetch; + + constructor(baseUrl: string, fetchImpl: typeof fetch = fetch) { + this.baseUrl = baseUrl.replace(/\/+$/, ""); // trim trailing slash + this.#fetch = fetchImpl; + } + + /** + * Lists all available actions. + * @throws {Error} When the HTTP request fails or response cannot be parsed as JSON + */ + public listActions(): Promise { + return this.#request("GET", "/action"); + } + + /** + * Triggers a specific action. + * @param action The action request to trigger + * @throws {Error} When the HTTP request fails or response cannot be parsed as JSON + */ + public triggerAction(action: ActionRequest): Promise { + return this.#request("POST", "/action", action); + } + + /** + * Gets the status of a specific action. + * @param actionId The ID of the action to check + * @throws {Error} When the HTTP request fails or response cannot be parsed as JSON + */ + public getActionStatus(actionId: string): Promise { + return this.#request("GET", `/action/${actionId}`); + } + + /** + * Executes an rladmin command. + * @param command The rladmin command to execute + * @param bdbId Optional database ID to target + * @throws {Error} When the HTTP request fails or response cannot be parsed as JSON + */ + public executeRladminCommand( + command: string, + bdbId?: string + ): Promise { + const cmd = bdbId ? `rladmin -b ${bdbId} ${command}` : `rladmin ${command}`; + return this.#request("POST", "/rladmin", cmd); + } + + /** + * Waits for an action to complete. + * @param actionId The ID of the action to wait for + * @param options Optional timeout and max wait time + * @throws {Error} When the action does not complete within the max wait time + */ + public async waitForAction( + actionId: string, + { + timeoutMs, + maxWaitTimeMs, + }: { + timeoutMs?: number; + maxWaitTimeMs?: number; + } = {} + ): Promise { + const timeout = timeoutMs || 1000; + const maxWaitTime = maxWaitTimeMs || 60000; + + const startTime = Date.now(); + + while (Date.now() - startTime < maxWaitTime) { + const action = await this.getActionStatus(actionId); + + if (["finished", "failed", "success"].includes(action.status)) { + return action; + } + + await setTimeout(timeout); + } + + throw new Error(`Timeout waiting for action ${actionId}`); + } + + async #request( + method: string, + path: string, + body?: Object | string + ): Promise { + const url = `${this.baseUrl}${path}`; + const headers: Record = { + "Content-Type": "application/json", + }; + + let payload: string | undefined; + + if (body) { + if (typeof body === "string") { + headers["Content-Type"] = "text/plain"; + payload = body; + } else { + headers["Content-Type"] = "application/json"; + payload = JSON.stringify(body); + } + } + + const response = await this.#fetch(url, { method, headers, body: payload }); + + if (!response.ok) { + try { + const text = await response.text(); + throw new Error(`HTTP ${response.status} - ${text}`); + } catch { + throw new Error(`HTTP ${response.status}`); + } + } + + try { + return (await response.json()) as T; + } catch { + throw new Error( + `HTTP ${response.status} - Unable to parse response as JSON` + ); + } + } +} diff --git a/packages/client/lib/tests/test-scenario/push-notification.e2e.ts b/packages/client/lib/tests/test-scenario/push-notification.e2e.ts new file mode 100644 index 0000000000..3408931728 --- /dev/null +++ b/packages/client/lib/tests/test-scenario/push-notification.e2e.ts @@ -0,0 +1,94 @@ +import assert from "node:assert"; +import diagnostics_channel from "node:diagnostics_channel"; +import { FaultInjectorClient } from "./fault-injector-client"; +import { + getDatabaseConfig, + getDatabaseConfigFromEnv, + getEnvConfig, + RedisConnectionConfig, +} from "./test-scenario.util"; +import { createClient } from "../../.."; +import { DiagnosticsEvent } from "../../client/enterprise-maintenance-manager"; +import { before } from "mocha"; + +describe("Push Notifications", () => { + const diagnosticsLog: DiagnosticsEvent[] = []; + + const onMessageHandler = (message: unknown) => { + diagnosticsLog.push(message as DiagnosticsEvent); + }; + + let clientConfig: RedisConnectionConfig; + let client: ReturnType>; + let faultInjectorClient: FaultInjectorClient; + + before(() => { + const envConfig = getEnvConfig(); + const redisConfig = getDatabaseConfigFromEnv( + envConfig.redisEndpointsConfigPath + ); + + faultInjectorClient = new FaultInjectorClient(envConfig.faultInjectorUrl); + clientConfig = getDatabaseConfig(redisConfig); + }); + + beforeEach(async () => { + diagnosticsLog.length = 0; + diagnostics_channel.subscribe("redis.maintenance", onMessageHandler); + + client = createClient({ + socket: { + host: clientConfig.host, + port: clientConfig.port, + ...(clientConfig.tls === true ? { tls: true } : {}), + }, + password: clientConfig.password, + username: clientConfig.username, + RESP: 3, + maintPushNotifications: "auto", + maintMovingEndpointType: "external-ip", + maintRelaxedCommandTimeout: 10000, + maintRelaxedSocketTimeout: 10000, + }); + + client.on("error", (err: Error) => { + throw new Error(`Client error: ${err.message}`); + }); + + await client.connect(); + }); + + afterEach(() => { + diagnostics_channel.unsubscribe("redis.maintenance", onMessageHandler); + client.destroy(); + }); + + it("should receive MOVING, MIGRATING, and MIGRATED push notifications", async () => { + const { action_id: migrateActionId } = + await faultInjectorClient.triggerAction<{ action_id: string }>({ + type: "migrate", + parameters: { + cluster_index: "0", + }, + }); + + await faultInjectorClient.waitForAction(migrateActionId); + + const { action_id: bindActionId } = + await faultInjectorClient.triggerAction<{ action_id: string }>({ + type: "bind", + parameters: { + cluster_index: "0", + bdb_id: `${clientConfig.bdbId}`, + }, + }); + + await faultInjectorClient.waitForAction(bindActionId); + + const pushNotificationLogs = diagnosticsLog.filter((log) => { + return ["MOVING", "MIGRATING", "MIGRATED"].includes(log?.type); + }); + + assert.strictEqual(pushNotificationLogs.length, 3); + }); +}); diff --git a/packages/client/lib/tests/test-scenario/test-scenario.util.ts b/packages/client/lib/tests/test-scenario/test-scenario.util.ts new file mode 100644 index 0000000000..82333be8f0 --- /dev/null +++ b/packages/client/lib/tests/test-scenario/test-scenario.util.ts @@ -0,0 +1,110 @@ +import { readFileSync } from "fs"; + +type DatabaseEndpoint = { + addr: string[]; + addr_type: string; + dns_name: string; + oss_cluster_api_preferred_endpoint_type: string; + oss_cluster_api_preferred_ip_type: string; + port: number; + proxy_policy: string; + uid: string; +}; + +type DatabaseConfig = { + bdb_id: number; + username: string; + password: string; + tls: boolean; + raw_endpoints: DatabaseEndpoint[]; + endpoints: string[]; +}; + +type DatabasesConfig = { + [databaseName: string]: DatabaseConfig; +}; + +type EnvConfig = { + redisEndpointsConfigPath: string; + faultInjectorUrl: string; +}; + +/** + * Reads environment variables required for the test scenario + * @returns Environment configuration object + * @throws Error if required environment variables are not set + */ +export function getEnvConfig(): EnvConfig { + if (!process.env.REDIS_ENDPOINTS_CONFIG_PATH) { + throw new Error( + "REDIS_ENDPOINTS_CONFIG_PATH environment variable must be set" + ); + } + + if (!process.env.FAULT_INJECTION_API_URL) { + throw new Error("FAULT_INJECTION_API_URL environment variable must be set"); + } + + return { + redisEndpointsConfigPath: process.env.REDIS_ENDPOINTS_CONFIG_PATH, + faultInjectorUrl: process.env.FAULT_INJECTION_API_URL, + }; +} + +/** + * Reads database configuration from a file + * @param filePath - The path to the database configuration file + * @returns Parsed database configuration object + * @throws Error if file doesn't exist or JSON is invalid + */ +export function getDatabaseConfigFromEnv(filePath: string): DatabasesConfig { + try { + const fileContent = readFileSync(filePath, "utf8"); + return JSON.parse(fileContent) as DatabasesConfig; + } catch (error) { + throw new Error(`Failed to read or parse database config from ${filePath}`); + } +} + +export interface RedisConnectionConfig { + host: string; + port: number; + username: string; + password: string; + tls: boolean; + bdbId: number; +} + +/** + * Gets Redis connection parameters for a specific database + * @param databasesConfig - The parsed database configuration object + * @param databaseName - Optional name of the database to retrieve (defaults to the first one) + * @returns Redis connection configuration with host, port, username, password, and tls + * @throws Error if the specified database is not found in the configuration + */ +export function getDatabaseConfig( + databasesConfig: DatabasesConfig, + databaseName?: string +): RedisConnectionConfig { + const dbConfig = databaseName + ? databasesConfig[databaseName] + : Object.values(databasesConfig)[0]; + + if (!dbConfig) { + throw new Error( + `Database ${databaseName ? databaseName : ""} not found in configuration` + ); + } + + const endpoint = dbConfig.raw_endpoints[0]; // Use the first endpoint + + return { + host: endpoint.dns_name, + port: endpoint.port, + username: dbConfig.username, + password: dbConfig.password, + tls: dbConfig.tls, + bdbId: dbConfig.bdb_id, + }; +} + From 11c1197966efd1e38dcd97a5133efe2bea24103c Mon Sep 17 00:00:00 2001 From: Pavel Pashov Date: Tue, 26 Aug 2025 13:23:13 +0300 Subject: [PATCH 16/24] refactor: improve enterprise manager push notification handling --- .../client/enterprise-maintenance-manager.ts | 32 +++++++++---------- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/packages/client/lib/client/enterprise-maintenance-manager.ts b/packages/client/lib/client/enterprise-maintenance-manager.ts index a9482d5335..98a95ccb1c 100644 --- a/packages/client/lib/client/enterprise-maintenance-manager.ts +++ b/packages/client/lib/client/enterprise-maintenance-manager.ts @@ -120,41 +120,39 @@ export default class EnterpriseMaintenanceManager { #onPush = (push: Array): boolean => { dbgMaintenance("ONPUSH:", push.map(String)); - switch (push[0].toString()) { + + if (!Array.isArray(push) || !["MOVING", "MIGRATING", "MIGRATED", "FAILING_OVER", "FAILED_OVER"].includes(String(push[0]))) { + return false; + } + + const type = String(push[0]); + + emitDiagnostics({ + type, + timestamp: Date.now(), + data: { + push: push.map(String), + }, + }); + switch (type) { case PN.MOVING: { // [ 'MOVING', '17', '15', '54.78.247.156:12075' ] // ^seq ^after ^new ip const afterSeconds = push[2]; const url: string | null = push[3] ? String(push[3]) : null; dbgMaintenance("Received MOVING:", afterSeconds, url); - emitDiagnostics({ - type: PN.MOVING, - timestamp: Date.now(), - data: { - afterSeconds, - url, - }, - }); this.#onMoving(afterSeconds, url); return true; } case PN.MIGRATING: case PN.FAILING_OVER: { dbgMaintenance("Received MIGRATING|FAILING_OVER"); - emitDiagnostics({ - type: PN.MIGRATING, - timestamp: Date.now(), - }); this.#onMigrating(); return true; } case PN.MIGRATED: case PN.FAILED_OVER: { dbgMaintenance("Received MIGRATED|FAILED_OVER"); - emitDiagnostics({ - type: PN.MIGRATED, - timestamp: Date.now(), - }); this.#onMigrated(); return true; } From 0977fc7d2936c64bb5105cdd513ad8adb5f4b025 Mon Sep 17 00:00:00 2001 From: Pavel Pashov <60297174+PavelPashov@users.noreply.github.com> Date: Fri, 29 Aug 2025 11:01:33 +0300 Subject: [PATCH 17/24] test: add E2E tests for Redis Enterprise maintenance timeout handling (#3) --- .../test-scenario/fault-injector-client.ts | 35 ++++ .../test-scenario/test-command-runner.ts | 104 ++++++++++++ .../timeout-during-notifications.e2e.ts | 155 ++++++++++++++++++ 3 files changed, 294 insertions(+) create mode 100644 packages/client/lib/tests/test-scenario/test-command-runner.ts create mode 100644 packages/client/lib/tests/test-scenario/timeout-during-notifications.e2e.ts diff --git a/packages/client/lib/tests/test-scenario/fault-injector-client.ts b/packages/client/lib/tests/test-scenario/fault-injector-client.ts index 7db75e3081..1f64eea4b3 100644 --- a/packages/client/lib/tests/test-scenario/fault-injector-client.ts +++ b/packages/client/lib/tests/test-scenario/fault-injector-client.ts @@ -108,6 +108,41 @@ export class FaultInjectorClient { throw new Error(`Timeout waiting for action ${actionId}`); } + async migrateAndBindAction({ + bdbId, + clusterIndex, + }: { + bdbId: string | number; + clusterIndex: string | number; + }) { + const bdbIdStr = bdbId.toString(); + const clusterIndexStr = clusterIndex.toString(); + + return this.triggerAction<{ + action_id: string; + }>({ + type: "sequence_of_actions", + parameters: { + bdbId: bdbIdStr, + actions: [ + { + type: "migrate", + parameters: { + cluster_index: clusterIndexStr, + }, + }, + { + type: "bind", + parameters: { + cluster_index: clusterIndexStr, + bdb_id: bdbIdStr, + }, + }, + ], + }, + }); + } + async #request( method: string, path: string, diff --git a/packages/client/lib/tests/test-scenario/test-command-runner.ts b/packages/client/lib/tests/test-scenario/test-command-runner.ts new file mode 100644 index 0000000000..d6aeba0902 --- /dev/null +++ b/packages/client/lib/tests/test-scenario/test-command-runner.ts @@ -0,0 +1,104 @@ +import { randomUUID } from "node:crypto"; +import { setTimeout } from "node:timers/promises"; +import { createClient } from "../../.."; + +/** + * Options for the `fireCommandsUntilStopSignal` method + */ +type FireCommandsUntilStopSignalOptions = { + /** + * Number of commands to fire in each batch + */ + batchSize: number; + /** + * Timeout between batches in milliseconds + */ + timeoutMs: number; + /** + * Function that creates the commands to be executed + */ + createCommands: ( + client: ReturnType> + ) => Array<() => Promise>; +}; + +export class TestCommandRunner { + constructor( + private client: ReturnType> + ) {} + + private defaultOptions: FireCommandsUntilStopSignalOptions = { + batchSize: 60, + timeoutMs: 10, + createCommands: ( + client: ReturnType> + ) => [ + () => client.set(randomUUID(), Date.now()), + () => client.get(randomUUID()), + ], + }; + + #toSettled(p: Promise) { + return p + .then((value) => ({ status: "fulfilled" as const, value, error: null })) + .catch((reason) => ({ + status: "rejected" as const, + value: null, + error: reason, + })); + } + + async #racePromises({ + timeout, + stopper, + }: { + timeout: Promise; + stopper: Promise; + }) { + return Promise.race([ + this.#toSettled(timeout).then((result) => ({ + ...result, + stop: false, + })), + this.#toSettled(stopper).then((result) => ({ ...result, stop: true })), + ]); + } + + /** + * Fires commands until a stop signal is received. + * @param stopSignalPromise Promise that resolves when the command execution should stop + * @param options Options for the command execution + * @returns Promise that resolves when the stop signal is received + */ + async fireCommandsUntilStopSignal( + stopSignalPromise: Promise, + options?: Partial + ) { + const executeOptions = { + ...this.defaultOptions, + ...options, + }; + + const commandPromises = []; + + while (true) { + for (let i = 0; i < executeOptions.batchSize; i++) { + for (const command of executeOptions.createCommands(this.client)) { + commandPromises.push(this.#toSettled(command())); + } + } + + const result = await this.#racePromises({ + timeout: setTimeout(executeOptions.timeoutMs), + stopper: stopSignalPromise, + }); + + if (result.stop) { + return { + commandPromises, + stopResult: result, + }; + } + } + } +} diff --git a/packages/client/lib/tests/test-scenario/timeout-during-notifications.e2e.ts b/packages/client/lib/tests/test-scenario/timeout-during-notifications.e2e.ts new file mode 100644 index 0000000000..8d8f0455ad --- /dev/null +++ b/packages/client/lib/tests/test-scenario/timeout-during-notifications.e2e.ts @@ -0,0 +1,155 @@ +import assert from "node:assert"; +import { setTimeout } from "node:timers/promises"; +import { FaultInjectorClient } from "./fault-injector-client"; +import { + getDatabaseConfig, + getDatabaseConfigFromEnv, + getEnvConfig, + RedisConnectionConfig, +} from "./test-scenario.util"; +import { createClient } from "../../../dist"; +import { before } from "mocha"; +import { TestCommandRunner } from "./test-command-runner"; + +describe("Timeout Handling During Notifications", () => { + let clientConfig: RedisConnectionConfig; + let client: ReturnType>; + let faultInjectorClient: FaultInjectorClient; + let commandRunner: TestCommandRunner; + + before(() => { + const envConfig = getEnvConfig(); + const redisConfig = getDatabaseConfigFromEnv( + envConfig.redisEndpointsConfigPath + ); + + faultInjectorClient = new FaultInjectorClient(envConfig.faultInjectorUrl); + clientConfig = getDatabaseConfig(redisConfig); + }); + + beforeEach(async () => { + client = createClient({ + socket: { + host: clientConfig.host, + port: clientConfig.port, + ...(clientConfig.tls === true ? { tls: true } : {}), + }, + password: clientConfig.password, + username: clientConfig.username, + RESP: 3, + maintPushNotifications: "auto", + maintMovingEndpointType: "auto", + }); + + client.on("error", (err: Error) => { + throw new Error(`Client error: ${err.message}`); + }); + + commandRunner = new TestCommandRunner(client); + + await client.connect(); + }); + + afterEach(() => { + client.destroy(); + }); + + it("should relax command timeout on MOVING, MIGRATING, and MIGRATED", async () => { + // PART 1 + // Set very low timeout to trigger errors + client.options!.maintRelaxedCommandTimeout = 50; + + const { action_id: lowTimeoutBindAndMigrateActionId } = + await faultInjectorClient.migrateAndBindAction({ + bdbId: clientConfig.bdbId, + clusterIndex: 0, + }); + + const lowTimeoutWaitPromise = faultInjectorClient.waitForAction( + lowTimeoutBindAndMigrateActionId + ); + + const lowTimeoutCommandPromises = + await commandRunner.fireCommandsUntilStopSignal(lowTimeoutWaitPromise); + + const lowTimeoutRejectedCommands = ( + await Promise.all(lowTimeoutCommandPromises.commandPromises) + ).filter((result) => result.status === "rejected"); + + assert.ok(lowTimeoutRejectedCommands.length > 0); + assert.strictEqual( + lowTimeoutRejectedCommands.filter((rejected) => { + return ( + // TODO instanceof doesn't work for some reason + rejected.error.constructor.name === + "CommandTimeoutDuringMaintananceError" + ); + }).length, + lowTimeoutRejectedCommands.length + ); + + // PART 2 + // Set high timeout to avoid errors + client.options!.maintRelaxedCommandTimeout = 10000; + + const { action_id: highTimeoutBindAndMigrateActionId } = + await faultInjectorClient.migrateAndBindAction({ + bdbId: clientConfig.bdbId, + clusterIndex: 0, + }); + + const highTimeoutWaitPromise = faultInjectorClient.waitForAction( + highTimeoutBindAndMigrateActionId + ); + + const highTimeoutCommandPromises = + await commandRunner.fireCommandsUntilStopSignal(highTimeoutWaitPromise); + + const highTimeoutRejectedCommands = ( + await Promise.all(highTimeoutCommandPromises.commandPromises) + ).filter((result) => result.status === "rejected"); + + assert.strictEqual(highTimeoutRejectedCommands.length, 0); + }); + + // TODO this is WIP + it.skip("should unrelax command timeout after MAINTENANCE", async () => { + client.options!.maintRelaxedCommandTimeout = 10000; + client.options!.commandOptions = { + ...client.options!.commandOptions, + timeout: 1, // Set very low timeout to trigger errors + }; + + const { action_id: bindAndMigrateActionId } = + await faultInjectorClient.migrateAndBindAction({ + bdbId: clientConfig.bdbId, + clusterIndex: 0, + }); + + const lowTimeoutWaitPromise = faultInjectorClient.waitForAction( + bindAndMigrateActionId + ); + + const relaxedTimeoutCommandPromises = + await commandRunner.fireCommandsUntilStopSignal(lowTimeoutWaitPromise); + + const relaxedTimeoutRejectedCommands = ( + await Promise.all(relaxedTimeoutCommandPromises.commandPromises) + ).filter((result) => result.status === "rejected"); + console.log( + "relaxedTimeoutRejectedCommands", + relaxedTimeoutRejectedCommands + ); + + assert.ok(relaxedTimeoutRejectedCommands.length === 0); + + const unrelaxedCommandPromises = + await commandRunner.fireCommandsUntilStopSignal(setTimeout(1 * 1000)); + + const unrelaxedRejectedCommands = ( + await Promise.all(unrelaxedCommandPromises.commandPromises) + ).filter((result) => result.status === "rejected"); + + assert.ok(unrelaxedRejectedCommands.length > 0); + }); +}); From fe1499c113c704ecea203e827cd2f0abc9bb630d Mon Sep 17 00:00:00 2001 From: Pavel Pashov <60297174+PavelPashov@users.noreply.github.com> Date: Fri, 29 Aug 2025 11:36:18 +0300 Subject: [PATCH 18/24] test: small fixes e2e tests (#5) --- .../client/lib/tests/test-scenario/fault-injector-client.ts | 4 ++-- .../tests/test-scenario/timeout-during-notifications.e2e.ts | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/client/lib/tests/test-scenario/fault-injector-client.ts b/packages/client/lib/tests/test-scenario/fault-injector-client.ts index 1f64eea4b3..d6635ac42e 100644 --- a/packages/client/lib/tests/test-scenario/fault-injector-client.ts +++ b/packages/client/lib/tests/test-scenario/fault-injector-client.ts @@ -127,13 +127,13 @@ export class FaultInjectorClient { actions: [ { type: "migrate", - parameters: { + params: { cluster_index: clusterIndexStr, }, }, { type: "bind", - parameters: { + params: { cluster_index: clusterIndexStr, bdb_id: bdbIdStr, }, diff --git a/packages/client/lib/tests/test-scenario/timeout-during-notifications.e2e.ts b/packages/client/lib/tests/test-scenario/timeout-during-notifications.e2e.ts index 8d8f0455ad..fbb63aa9fc 100644 --- a/packages/client/lib/tests/test-scenario/timeout-during-notifications.e2e.ts +++ b/packages/client/lib/tests/test-scenario/timeout-during-notifications.e2e.ts @@ -7,7 +7,7 @@ import { getEnvConfig, RedisConnectionConfig, } from "./test-scenario.util"; -import { createClient } from "../../../dist"; +import { createClient } from "../../.."; import { before } from "mocha"; import { TestCommandRunner } from "./test-command-runner"; From 0d7701d4b820f0383b1724e665416aed02216400 Mon Sep 17 00:00:00 2001 From: Pavel Pashov <60297174+PavelPashov@users.noreply.github.com> Date: Mon, 1 Sep 2025 14:27:23 +0300 Subject: [PATCH 19/24] =?UTF-8?q?refactor(test):=20improve=20e2e=20test=20?= =?UTF-8?q?infrastructure=20for=20maintenance=20scena=E2=80=A6=20(#6)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * refactor(test): improve e2e test infrastructure for maintenance scenarios * refactor(test): improve e2e test infrastructure for maintenance scenarios --- .../test-scenario/test-command-runner.ts | 40 ++++---- .../tests/test-scenario/test-scenario.util.ts | 87 ++++++++++++++++ .../timeout-during-notifications.e2e.ts | 98 ++++++++++--------- 3 files changed, 160 insertions(+), 65 deletions(-) diff --git a/packages/client/lib/tests/test-scenario/test-command-runner.ts b/packages/client/lib/tests/test-scenario/test-command-runner.ts index d6aeba0902..9e1acc3a8a 100644 --- a/packages/client/lib/tests/test-scenario/test-command-runner.ts +++ b/packages/client/lib/tests/test-scenario/test-command-runner.ts @@ -22,12 +22,11 @@ type FireCommandsUntilStopSignalOptions = { ) => Array<() => Promise>; }; +/** + * Utility class for running test commands until a stop signal is received + */ export class TestCommandRunner { - constructor( - private client: ReturnType> - ) {} - - private defaultOptions: FireCommandsUntilStopSignalOptions = { + private static readonly defaultOptions: FireCommandsUntilStopSignalOptions = { batchSize: 60, timeoutMs: 10, createCommands: ( @@ -38,7 +37,7 @@ export class TestCommandRunner { ], }; - #toSettled(p: Promise) { + static #toSettled(p: Promise) { return p .then((value) => ({ status: "fulfilled" as const, value, error: null })) .catch((reason) => ({ @@ -48,7 +47,7 @@ export class TestCommandRunner { })); } - async #racePromises({ + static async #racePromises({ timeout, stopper, }: { @@ -56,26 +55,31 @@ export class TestCommandRunner { stopper: Promise; }) { return Promise.race([ - this.#toSettled(timeout).then((result) => ({ + TestCommandRunner.#toSettled(timeout).then((result) => ({ ...result, stop: false, })), - this.#toSettled(stopper).then((result) => ({ ...result, stop: true })), + TestCommandRunner.#toSettled(stopper).then((result) => ({ + ...result, + stop: true, + })), ]); } /** - * Fires commands until a stop signal is received. - * @param stopSignalPromise Promise that resolves when the command execution should stop - * @param options Options for the command execution - * @returns Promise that resolves when the stop signal is received + * Fires a batch of test commands until a stop signal is received + * @param client - The Redis client to use + * @param stopSignalPromise - Promise that resolves when the execution should stop + * @param options - Options for the command execution + * @returns An object containing the promises of all executed commands and the result of the stop signal */ - async fireCommandsUntilStopSignal( + static async fireCommandsUntilStopSignal( + client: ReturnType>, stopSignalPromise: Promise, options?: Partial ) { const executeOptions = { - ...this.defaultOptions, + ...TestCommandRunner.defaultOptions, ...options, }; @@ -83,12 +87,12 @@ export class TestCommandRunner { while (true) { for (let i = 0; i < executeOptions.batchSize; i++) { - for (const command of executeOptions.createCommands(this.client)) { - commandPromises.push(this.#toSettled(command())); + for (const command of executeOptions.createCommands(client)) { + commandPromises.push(TestCommandRunner.#toSettled(command())); } } - const result = await this.#racePromises({ + const result = await TestCommandRunner.#racePromises({ timeout: setTimeout(executeOptions.timeoutMs), stopper: stopSignalPromise, }); diff --git a/packages/client/lib/tests/test-scenario/test-scenario.util.ts b/packages/client/lib/tests/test-scenario/test-scenario.util.ts index 82333be8f0..b130cdc538 100644 --- a/packages/client/lib/tests/test-scenario/test-scenario.util.ts +++ b/packages/client/lib/tests/test-scenario/test-scenario.util.ts @@ -1,4 +1,6 @@ import { readFileSync } from "fs"; +import { createClient, RedisClientOptions } from "../../.."; +import { stub } from "sinon"; type DatabaseEndpoint = { addr: string[]; @@ -108,3 +110,88 @@ export function getDatabaseConfig( }; } +// TODO this should be moved in the tests utils package +export async function blockSetImmediate(fn: () => Promise) { + let setImmediateStub: any; + + try { + setImmediateStub = stub(global, "setImmediate"); + setImmediateStub.callsFake(() => { + //Dont call the callback, effectively blocking execution + }); + await fn(); + } finally { + if (setImmediateStub) { + setImmediateStub.restore(); + } + } +} + +/** + * Factory class for creating and managing Redis clients + */ +export class ClientFactory { + private readonly clients = new Map< + string, + ReturnType> + >(); + + constructor(private readonly config: RedisConnectionConfig) {} + + /** + * Creates a new client with the specified options and connects it to the database + * @param key - The key to store the client under + * @param options - Optional client options + * @returns The created and connected client + */ + async create(key: string, options: Partial = {}) { + const client = createClient({ + socket: { + host: this.config.host, + port: this.config.port, + ...(this.config.tls === true ? { tls: true } : {}), + }, + password: this.config.password, + username: this.config.username, + RESP: 3, + maintPushNotifications: "auto", + maintMovingEndpointType: "auto", + ...options, + }); + + client.on("error", (err: Error) => { + throw new Error(`Client error: ${err.message}`); + }); + + await client.connect(); + + this.clients.set(key, client); + + return client; + } + + /** + * Gets an existing client by key or the first one if no key is provided + * @param key - The key of the client to retrieve + * @returns The client if found, undefined otherwise + */ + get(key?: string) { + if (key) { + return this.clients.get(key); + } + + // Get the first one if no key is provided + return this.clients.values().next().value; + } + + /** + * Destroys all created clients + */ + destroyAll() { + this.clients.forEach((client) => { + if (client && client.isOpen) { + client.destroy(); + } + }); + } +} diff --git a/packages/client/lib/tests/test-scenario/timeout-during-notifications.e2e.ts b/packages/client/lib/tests/test-scenario/timeout-during-notifications.e2e.ts index fbb63aa9fc..7bdf23fcb1 100644 --- a/packages/client/lib/tests/test-scenario/timeout-during-notifications.e2e.ts +++ b/packages/client/lib/tests/test-scenario/timeout-during-notifications.e2e.ts @@ -1,11 +1,13 @@ import assert from "node:assert"; -import { setTimeout } from "node:timers/promises"; + import { FaultInjectorClient } from "./fault-injector-client"; import { + ClientFactory, getDatabaseConfig, getDatabaseConfigFromEnv, getEnvConfig, RedisConnectionConfig, + blockSetImmediate } from "./test-scenario.util"; import { createClient } from "../../.."; import { before } from "mocha"; @@ -13,9 +15,9 @@ import { TestCommandRunner } from "./test-command-runner"; describe("Timeout Handling During Notifications", () => { let clientConfig: RedisConnectionConfig; - let client: ReturnType>; + let clientFactory: ClientFactory; let faultInjectorClient: FaultInjectorClient; - let commandRunner: TestCommandRunner; + let defaultClient: ReturnType>; before(() => { const envConfig = getEnvConfig(); @@ -23,41 +25,27 @@ describe("Timeout Handling During Notifications", () => { envConfig.redisEndpointsConfigPath ); - faultInjectorClient = new FaultInjectorClient(envConfig.faultInjectorUrl); clientConfig = getDatabaseConfig(redisConfig); + faultInjectorClient = new FaultInjectorClient(envConfig.faultInjectorUrl); + clientFactory = new ClientFactory(clientConfig); }); beforeEach(async () => { - client = createClient({ - socket: { - host: clientConfig.host, - port: clientConfig.port, - ...(clientConfig.tls === true ? { tls: true } : {}), - }, - password: clientConfig.password, - username: clientConfig.username, - RESP: 3, - maintPushNotifications: "auto", - maintMovingEndpointType: "auto", - }); - - client.on("error", (err: Error) => { - throw new Error(`Client error: ${err.message}`); - }); - - commandRunner = new TestCommandRunner(client); + defaultClient = await clientFactory.create("default"); - await client.connect(); + await defaultClient.flushAll(); }); - afterEach(() => { - client.destroy(); + afterEach(async () => { + clientFactory.destroyAll(); }); it("should relax command timeout on MOVING, MIGRATING, and MIGRATED", async () => { // PART 1 // Set very low timeout to trigger errors - client.options!.maintRelaxedCommandTimeout = 50; + const lowTimeoutClient = await clientFactory.create("lowTimeout", { + maintRelaxedCommandTimeout: 50, + }); const { action_id: lowTimeoutBindAndMigrateActionId } = await faultInjectorClient.migrateAndBindAction({ @@ -70,7 +58,10 @@ describe("Timeout Handling During Notifications", () => { ); const lowTimeoutCommandPromises = - await commandRunner.fireCommandsUntilStopSignal(lowTimeoutWaitPromise); + await TestCommandRunner.fireCommandsUntilStopSignal( + lowTimeoutClient, + lowTimeoutWaitPromise + ); const lowTimeoutRejectedCommands = ( await Promise.all(lowTimeoutCommandPromises.commandPromises) @@ -90,7 +81,9 @@ describe("Timeout Handling During Notifications", () => { // PART 2 // Set high timeout to avoid errors - client.options!.maintRelaxedCommandTimeout = 10000; + const highTimeoutClient = await clientFactory.create("highTimeout", { + maintRelaxedCommandTimeout: 10000, + }); const { action_id: highTimeoutBindAndMigrateActionId } = await faultInjectorClient.migrateAndBindAction({ @@ -103,7 +96,10 @@ describe("Timeout Handling During Notifications", () => { ); const highTimeoutCommandPromises = - await commandRunner.fireCommandsUntilStopSignal(highTimeoutWaitPromise); + await TestCommandRunner.fireCommandsUntilStopSignal( + highTimeoutClient, + highTimeoutWaitPromise + ); const highTimeoutRejectedCommands = ( await Promise.all(highTimeoutCommandPromises.commandPromises) @@ -112,13 +108,15 @@ describe("Timeout Handling During Notifications", () => { assert.strictEqual(highTimeoutRejectedCommands.length, 0); }); - // TODO this is WIP - it.skip("should unrelax command timeout after MAINTENANCE", async () => { - client.options!.maintRelaxedCommandTimeout = 10000; - client.options!.commandOptions = { - ...client.options!.commandOptions, - timeout: 1, // Set very low timeout to trigger errors - }; + it("should unrelax command timeout after MAINTENANCE", async () => { + const clientWithCommandTimeout = await clientFactory.create( + "clientWithCommandTimeout", + { + commandOptions: { + timeout: 100, + }, + } + ); const { action_id: bindAndMigrateActionId } = await faultInjectorClient.migrateAndBindAction({ @@ -131,25 +129,31 @@ describe("Timeout Handling During Notifications", () => { ); const relaxedTimeoutCommandPromises = - await commandRunner.fireCommandsUntilStopSignal(lowTimeoutWaitPromise); + await TestCommandRunner.fireCommandsUntilStopSignal( + clientWithCommandTimeout, + lowTimeoutWaitPromise + ); const relaxedTimeoutRejectedCommands = ( await Promise.all(relaxedTimeoutCommandPromises.commandPromises) ).filter((result) => result.status === "rejected"); - console.log( - "relaxedTimeoutRejectedCommands", - relaxedTimeoutRejectedCommands - ); assert.ok(relaxedTimeoutRejectedCommands.length === 0); - const unrelaxedCommandPromises = - await commandRunner.fireCommandsUntilStopSignal(setTimeout(1 * 1000)); + const start = performance.now(); - const unrelaxedRejectedCommands = ( - await Promise.all(unrelaxedCommandPromises.commandPromises) - ).filter((result) => result.status === "rejected"); + let error: any; + await blockSetImmediate(async () => { + try { + await clientWithCommandTimeout.set("key", "value"); + } catch (err: any) { + error = err; + } + }); - assert.ok(unrelaxedRejectedCommands.length > 0); + // Make sure it took less than 1sec to fail + assert.ok(performance.now() - start < 1000); + assert.ok(error instanceof Error); + assert.ok(error.constructor.name === "TimeoutError"); }); }); From 4bb0b068c7b687864e2f3169c97a9aaf8753b6d3 Mon Sep 17 00:00:00 2001 From: Nikolay Karadzhov Date: Mon, 1 Sep 2025 15:26:26 +0300 Subject: [PATCH 20/24] test: add connection handoff test --- .../test-scenario/connection-handoff.e2e.ts | 126 ++++++++++++++++++ 1 file changed, 126 insertions(+) create mode 100644 packages/client/lib/tests/test-scenario/connection-handoff.e2e.ts diff --git a/packages/client/lib/tests/test-scenario/connection-handoff.e2e.ts b/packages/client/lib/tests/test-scenario/connection-handoff.e2e.ts new file mode 100644 index 0000000000..c9207d1d5e --- /dev/null +++ b/packages/client/lib/tests/test-scenario/connection-handoff.e2e.ts @@ -0,0 +1,126 @@ +import diagnostics_channel from "node:diagnostics_channel"; +import { FaultInjectorClient } from "./fault-injector-client"; +import { + getDatabaseConfig, + getDatabaseConfigFromEnv, + getEnvConfig, + RedisConnectionConfig, +} from "./test-scenario.util"; +import { createClient } from "../../.."; +import { DiagnosticsEvent } from "../../client/enterprise-maintenance-manager"; +import { before } from "mocha"; +import { spy } from "sinon"; +import assert from "node:assert"; +import { TestCommandRunner } from "./test-command-runner"; +import net from "node:net"; + +describe("Connection Handoff", () => { + const diagnosticsLog: DiagnosticsEvent[] = []; + + const onMessageHandler = (message: unknown) => { + diagnosticsLog.push(message as DiagnosticsEvent); + }; + + let clientConfig: RedisConnectionConfig; + let client: ReturnType>; + let faultInjectorClient: FaultInjectorClient; + let connectSpy = spy(net, "createConnection"); + + before(() => { + const envConfig = getEnvConfig(); + const redisConfig = getDatabaseConfigFromEnv( + envConfig.redisEndpointsConfigPath, + ); + + faultInjectorClient = new FaultInjectorClient(envConfig.faultInjectorUrl); + clientConfig = getDatabaseConfig(redisConfig); + }); + + beforeEach(async () => { + diagnosticsLog.length = 0; + diagnostics_channel.subscribe("redis.maintenance", onMessageHandler); + + connectSpy.resetHistory(); + + client = createClient({ + socket: { + host: clientConfig.host, + port: clientConfig.port, + ...(clientConfig.tls === true ? { tls: true } : {}), + }, + password: clientConfig.password, + username: clientConfig.username, + RESP: 3, + maintPushNotifications: "auto", + maintMovingEndpointType: "external-ip", + maintRelaxedCommandTimeout: 10000, + maintRelaxedSocketTimeout: 10000, + }); + + client.on("error", (err: Error) => { + throw new Error(`Client error: ${err.message}`); + }); + + await client.connect(); + await client.flushAll(); + }); + + afterEach(() => { + diagnostics_channel.unsubscribe("redis.maintenance", onMessageHandler); + client.destroy(); + }); + + describe("New Connection Establishment", () => { + it("should establish new connection", async () => { + assert.equal(connectSpy.callCount, 1); + + const { action_id: lowTimeoutBindAndMigrateActionId } = + await faultInjectorClient.migrateAndBindAction({ + bdbId: clientConfig.bdbId, + clusterIndex: 0, + }); + + const lowTimeoutWaitPromise = faultInjectorClient.waitForAction( + lowTimeoutBindAndMigrateActionId, + ); + + await lowTimeoutWaitPromise; + assert.equal(connectSpy.callCount, 2); + }); + }); + + describe("TLS Connection Handoff", () => { + it("TODO receiveMessagesWithTLSEnabledTest", async () => { + // + }); + it("TODO connectionHandoffWithStaticInternalNameTest", async () => { + // + }); + it("TODO connectionHandoffWithStaticExternalNameTest", async () => { + // + }); + }); + + describe("Traffic Resumption", () => { + it("Traffic resumed after handoff", async () => { + const { action_id } = await faultInjectorClient.migrateAndBindAction({ + bdbId: clientConfig.bdbId, + clusterIndex: 0, + }); + + const workloadPromise = faultInjectorClient.waitForAction(action_id); + + const commandPromises = + await TestCommandRunner.fireCommandsUntilStopSignal( + client, + workloadPromise, + ); + + const rejected = ( + await Promise.all(commandPromises.commandPromises) + ).filter((result) => result.status === "rejected"); + + assert.ok(rejected.length === 0); + }); + }); +}); From 7a4cd50ccbf546147a9462e3c9516ad12efd7d87 Mon Sep 17 00:00:00 2001 From: Nikolay Karadzhov Date: Tue, 2 Sep 2025 13:00:07 +0300 Subject: [PATCH 21/24] add REDIS_ prefix to env variables --- .../client/lib/client/enterprise-maintenance-manager.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/client/lib/client/enterprise-maintenance-manager.ts b/packages/client/lib/client/enterprise-maintenance-manager.ts index 98a95ccb1c..a4f5fabda8 100644 --- a/packages/client/lib/client/enterprise-maintenance-manager.ts +++ b/packages/client/lib/client/enterprise-maintenance-manager.ts @@ -29,12 +29,12 @@ export type DiagnosticsEvent = { }; export const dbgMaintenance = (...args: any[]) => { - if (!process.env.DEBUG_MAINTENANCE) return; + if (!process.env.REDIS_DEBUG_MAINTENANCE) return; return console.log("[MNT]", ...args); }; export const emitDiagnostics = (event: DiagnosticsEvent) => { - if (!process.env.EMIT_DIAGNOSTICS) return; + if (!process.env.REDIS_EMIT_DIAGNOSTICS) return; const channel = diagnostics_channel.channel("redis.maintenance"); channel.publish(event); @@ -120,7 +120,7 @@ export default class EnterpriseMaintenanceManager { #onPush = (push: Array): boolean => { dbgMaintenance("ONPUSH:", push.map(String)); - + if (!Array.isArray(push) || !["MOVING", "MIGRATING", "MIGRATED", "FAILING_OVER", "FAILED_OVER"].includes(String(push[0]))) { return false; } From c7188bc10542d557b342d38e996f6f62fc666b3b Mon Sep 17 00:00:00 2001 From: Nikolay Karadzhov Date: Tue, 2 Sep 2025 14:42:15 +0300 Subject: [PATCH 22/24] address PR comments --- packages/client/lib/client/commands-queue.ts | 6 ++--- .../client/enterprise-maintenance-manager.ts | 24 +++++++++---------- packages/client/lib/client/index.ts | 6 ++--- packages/client/lib/client/socket.ts | 4 ++-- packages/client/lib/errors.ts | 4 ++-- 5 files changed, 21 insertions(+), 23 deletions(-) diff --git a/packages/client/lib/client/commands-queue.ts b/packages/client/lib/client/commands-queue.ts index 6893a04f14..ae67ca28cd 100644 --- a/packages/client/lib/client/commands-queue.ts +++ b/packages/client/lib/client/commands-queue.ts @@ -3,7 +3,7 @@ import encodeCommand from '../RESP/encoder'; import { Decoder, PUSH_TYPE_MAPPING, RESP_TYPES } from '../RESP/decoder'; import { TypeMapping, ReplyUnion, RespVersions, RedisArgument } from '../RESP/types'; import { ChannelListeners, PubSub, PubSubCommand, PubSubListener, PubSubType, PubSubTypeListeners } from './pub-sub'; -import { AbortError, ErrorReply, CommandTimeoutDuringMaintananceError, TimeoutError } from '../errors'; +import { AbortError, ErrorReply, CommandTimeoutDuringMaintenanceError, TimeoutError } from '../errors'; import { MonitorCallback } from '.'; import { dbgMaintenance } from './enterprise-maintenance-manager'; @@ -107,7 +107,7 @@ export default class RedisCommandsQueue { signal, listener: () => { this.#toWrite.remove(node); - command.reject(new CommandTimeoutDuringMaintananceError(newTimeout)); + command.reject(new CommandTimeoutDuringMaintenanceError(newTimeout)); }, originalTimeout: command.timeout?.originalTimeout }; @@ -231,7 +231,7 @@ export default class RedisCommandsQueue { signal, listener: () => { this.#toWrite.remove(node); - value.reject(wasInMaintenance ? new CommandTimeoutDuringMaintananceError(timeout) : new TimeoutError()); + value.reject(wasInMaintenance ? new CommandTimeoutDuringMaintenanceError(timeout) : new TimeoutError()); }, originalTimeout: options?.timeout }; diff --git a/packages/client/lib/client/enterprise-maintenance-manager.ts b/packages/client/lib/client/enterprise-maintenance-manager.ts index a4f5fabda8..d4766d9e53 100644 --- a/packages/client/lib/client/enterprise-maintenance-manager.ts +++ b/packages/client/lib/client/enterprise-maintenance-manager.ts @@ -221,27 +221,25 @@ export default class EnterpriseMaintenanceManager { } else { tmpOptions.socket = { ...tmpOptions.socket, - host, port + host, + port } } const tmpClient = this.#client.duplicate(tmpOptions); dbgMaintenance(`Tmp client created in ${( performance.now() - start ).toFixed(2)}ms`); - dbgMaintenance(`Connecting tmp client: ${host}:${port}`); - start = performance.now(); + dbgMaintenance( + `Set timeout for tmp client to ${this.#options.maintRelaxedSocketTimeout}`, + ); tmpClient._maintenanceUpdate({ relaxedCommandTimeout: this.#options.maintRelaxedCommandTimeout, relaxedSocketTimeout: this.#options.maintRelaxedSocketTimeout, }); + dbgMaintenance(`Connecting tmp client: ${host}:${port}`); + start = performance.now(); await tmpClient.connect(); dbgMaintenance(`Connected to tmp client in ${(performance.now() - start).toFixed(2)}ms`); // 3 [EVENT] New socket connected - //TODO - // dbgMaintenance( - // `Set timeout for new socket to ${this.#options.maintRelaxedSocketTimeout}`, - // ); - // newSocket.setMaintenanceTimeout(this.#options.maintRelaxedSocketTimeout); - dbgMaintenance(`Wait for all in-flight commands to complete`); await this.#commandsQueue.waitForInflightCommandsToComplete(); dbgMaintenance(`In-flight commands completed`); @@ -260,7 +258,7 @@ export default class EnterpriseMaintenanceManager { this.#onMigrated(); }; - #onMigrating = async () => { + #onMigrating = () => { this.#isMaintenance++; if (this.#isMaintenance > 1) { dbgMaintenance(`Timeout relaxation already done`); @@ -275,9 +273,9 @@ export default class EnterpriseMaintenanceManager { this.#client._maintenanceUpdate(update); }; - #onMigrated = async () => { - this.#isMaintenance--; - assert(this.#isMaintenance >= 0); + #onMigrated = () => { + //ensure that #isMaintenance doesnt go under 0 + this.#isMaintenance = Math.max(this.#isMaintenance - 1, 0); if (this.#isMaintenance > 0) { dbgMaintenance(`Not ready to unrelax timeouts yet`); return; diff --git a/packages/client/lib/client/index.ts b/packages/client/lib/client/index.ts index b1a50e3fa0..cf5763357a 100644 --- a/packages/client/lib/client/index.ts +++ b/packages/client/lib/client/index.ts @@ -171,7 +171,7 @@ export interface RedisClientOptions< /** * Specifies a more relaxed timeout (in milliseconds) for commands during a maintenance window. * This helps minimize command timeouts during maintenance. If not provided, the `commandOptions.timeout` - * will be used instead. Timeouts during maintenance period result in a `CommandTimeoutDuringMaintanance` error. + * will be used instead. Timeouts during maintenance period result in a `CommandTimeoutDuringMaintenance` error. * * The default is 10000 */ @@ -179,7 +179,7 @@ export interface RedisClientOptions< /** * Specifies a more relaxed timeout (in milliseconds) for the socket during a maintenance window. * This helps minimize socket timeouts during maintenance. If not provided, the `socket.timeout` - * will be used instead. Timeouts during maintenance period result in a `SocketTimeoutDuringMaintanance` error. + * will be used instead. Timeouts during maintenance period result in a `SocketTimeoutDuringMaintenance` error. * * The default is 10000 */ @@ -943,7 +943,7 @@ export default class RedisClient< } /** - * @intenal + * @internal */ _insertSocket(socket: RedisSocket) { if(this._self.#socket) { diff --git a/packages/client/lib/client/socket.ts b/packages/client/lib/client/socket.ts index 9d8ebdae07..c5569e8654 100644 --- a/packages/client/lib/client/socket.ts +++ b/packages/client/lib/client/socket.ts @@ -1,7 +1,7 @@ import { EventEmitter, once } from 'node:events'; import net from 'node:net'; import tls from 'node:tls'; -import { ConnectionTimeoutError, ClientClosedError, SocketClosedUnexpectedlyError, ReconnectStrategyError, SocketTimeoutError, SocketTimeoutDuringMaintananceError } from '../errors'; +import { ConnectionTimeoutError, ClientClosedError, SocketClosedUnexpectedlyError, ReconnectStrategyError, SocketTimeoutError, SocketTimeoutDuringMaintenanceError } from '../errors'; import { setTimeout } from 'node:timers/promises'; import { RedisArgument } from '../RESP/types'; import { dbgMaintenance } from './enterprise-maintenance-manager'; @@ -280,7 +280,7 @@ export default class RedisSocket extends EventEmitter { if (this.#socketTimeout) { socket.once('timeout', () => { const error = this.#maintenanceTimeout - ? new SocketTimeoutDuringMaintananceError(this.#maintenanceTimeout) + ? new SocketTimeoutDuringMaintenanceError(this.#maintenanceTimeout) : new SocketTimeoutError(this.#socketTimeout!) socket.destroy(error); }); diff --git a/packages/client/lib/errors.ts b/packages/client/lib/errors.ts index ae4d598abd..4d9ddf7f2b 100644 --- a/packages/client/lib/errors.ts +++ b/packages/client/lib/errors.ts @@ -71,13 +71,13 @@ export class BlobError extends ErrorReply {} export class TimeoutError extends Error {} -export class SocketTimeoutDuringMaintananceError extends TimeoutError { +export class SocketTimeoutDuringMaintenanceError extends TimeoutError { constructor(timeout: number) { super(`Socket timeout during maintenance. Expecting data, but didn't receive any in ${timeout}ms.`); } } -export class CommandTimeoutDuringMaintananceError extends TimeoutError { +export class CommandTimeoutDuringMaintenanceError extends TimeoutError { constructor(timeout: number) { super(`Command timeout during maintenance. Waited to write command for more than ${timeout}ms.`); } From d91a0b8eea052cf69774f7ac286dae64b3b66ec4 Mon Sep 17 00:00:00 2001 From: Nikolay Karadzhov Date: Tue, 9 Sep 2025 15:33:40 +0300 Subject: [PATCH 23/24] chore: update peerdeps to include beta ranges --- packages/bloom/package.json | 2 +- packages/entraid/package.json | 2 +- packages/json/package.json | 2 +- packages/redis/package.json | 10 +++++----- packages/search/package.json | 2 +- packages/time-series/package.json | 2 +- 6 files changed, 10 insertions(+), 10 deletions(-) diff --git a/packages/bloom/package.json b/packages/bloom/package.json index e2ff5a8b42..4f46bce4ad 100644 --- a/packages/bloom/package.json +++ b/packages/bloom/package.json @@ -13,7 +13,7 @@ "release": "release-it" }, "peerDependencies": { - "@redis/client": "^5.8.2" + "@redis/client": "^5.8.2 || ^5.9.0-0" }, "devDependencies": { "@redis/test-utils": "*" diff --git a/packages/entraid/package.json b/packages/entraid/package.json index 9991fa3fb8..272747d1a8 100644 --- a/packages/entraid/package.json +++ b/packages/entraid/package.json @@ -22,7 +22,7 @@ "@azure/msal-node": "^2.16.1" }, "peerDependencies": { - "@redis/client": "^5.8.2" + "@redis/client": "^5.8.2 || ^5.9.0-0" }, "devDependencies": { "@types/express": "^4.17.21", diff --git a/packages/json/package.json b/packages/json/package.json index ff689dd17e..a1db4e44b8 100644 --- a/packages/json/package.json +++ b/packages/json/package.json @@ -13,7 +13,7 @@ "release": "release-it" }, "peerDependencies": { - "@redis/client": "^5.8.2" + "@redis/client": "^5.8.2 || ^5.9.0-0" }, "devDependencies": { "@redis/test-utils": "*" diff --git a/packages/redis/package.json b/packages/redis/package.json index 583a660681..ed2715d06f 100644 --- a/packages/redis/package.json +++ b/packages/redis/package.json @@ -13,11 +13,11 @@ "release": "release-it" }, "dependencies": { - "@redis/bloom": "5.8.2", - "@redis/client": "5.8.2", - "@redis/json": "5.8.2", - "@redis/search": "5.8.2", - "@redis/time-series": "5.8.2" + "@redis/bloom": "^5.8.2 || ^5.9.0-0", + "@redis/client": "^5.8.2 || ^5.9.0-0", + "@redis/json": "^5.8.2 || ^5.9.0-0", + "@redis/search": "^5.8.2 || ^5.9.0-0", + "@redis/time-series": "^5.8.2 || ^5.9.0-0" }, "engines": { "node": ">= 18" diff --git a/packages/search/package.json b/packages/search/package.json index 40238080e8..20fe27aad6 100644 --- a/packages/search/package.json +++ b/packages/search/package.json @@ -14,7 +14,7 @@ "release": "release-it" }, "peerDependencies": { - "@redis/client": "^5.8.2" + "@redis/client": "^5.8.2 || ^5.9.0-0" }, "devDependencies": { "@redis/test-utils": "*" diff --git a/packages/time-series/package.json b/packages/time-series/package.json index 46ea5b16fe..0e13ac1a11 100644 --- a/packages/time-series/package.json +++ b/packages/time-series/package.json @@ -13,7 +13,7 @@ "release": "release-it" }, "peerDependencies": { - "@redis/client": "^5.8.2" + "@redis/client": "^5.8.2 || ^5.9.0-0" }, "devDependencies": { "@redis/test-utils": "*" From 74da65765f422649c42927a145e4577fd937aae6 Mon Sep 17 00:00:00 2001 From: Nikolay Karadzhov Date: Tue, 9 Sep 2025 15:38:14 +0300 Subject: [PATCH 24/24] chore: remove nonexistent package from lock --- package-lock.json | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/package-lock.json b/package-lock.json index 736abe70a4..288e109c97 100644 --- a/package-lock.json +++ b/package-lock.json @@ -7319,22 +7319,6 @@ "url": "https://github.com/sponsors/sindresorhus" } }, - "packages/authx": { - "name": "@redis/authx", - "version": "5.0.0-next.5", - "extraneous": true, - "license": "MIT", - "dependencies": { - "@azure/msal-node": "^2.16.1" - }, - "devDependencies": {}, - "engines": { - "node": ">= 18" - }, - "peerDependencies": { - "@redis/client": "^5.0.0-next.5" - } - }, "packages/bloom": { "name": "@redis/bloom", "version": "5.8.2",