Skip to content

Commit fc8cc48

Browse files
feat: added configurable connection retries for DB and Redis readiness (#214)
* feat: add configurable connection retries for DB and Redis readiness - Introduced redis_isready.py to wait for Redis connection readiness. - Updated main.py to wait for both database and Redis to be ready before proceeding. - Added tests for Redis readiness functionality. Signed-off-by: reevebarreto <reevemarcbarreto@gmail.com> * Linting fixes and autoflake etc. Signed-off-by: Mihai Criveti <crivetimihai@gmail.com> * cli entrypoint Signed-off-by: Mihai Criveti <crivetimihai@gmail.com> --------- Signed-off-by: reevebarreto <reevemarcbarreto@gmail.com> Signed-off-by: Mihai Criveti <crivetimihai@gmail.com> Co-authored-by: Mihai Criveti <crivetimihai@gmail.com>
1 parent 3c7d3c6 commit fc8cc48

File tree

7 files changed

+462
-12
lines changed

7 files changed

+462
-12
lines changed

.env.example

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,12 @@ DB_POOL_TIMEOUT=30
3838
# Recycle database connections after N seconds
3939
DB_POOL_RECYCLE=3600
4040

41+
# Maximum number of times to boot database connection for cold start
42+
DB_MAX_RETRIES=3
43+
44+
# Interval time for next retry of database connection
45+
DB_RETRY_INTERVAL_MS=2000
46+
4147
#####################################
4248
# Cache Backend
4349
#####################################
@@ -58,6 +64,12 @@ SESSION_TTL=3600
5864
# TTL for ephemeral messages (like completions) in seconds
5965
MESSAGE_TTL=600
6066

67+
# Maximum number of times to boot redis connection for cold start
68+
REDIS_MAX_RETRIES=3
69+
70+
# Interval time for next retry of redis connection
71+
REDIS_RETRY_INTERVAL_MS=2000
72+
6173
#####################################
6274
# Protocol Settings
6375
#####################################

README.md

Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -970,20 +970,24 @@ You can get started by copying the provided [.env.example](.env.example) to `.en
970970

971971
### Database
972972

973-
| Setting | Description | Default | Options |
974-
| ----------------- | ------------------------------- | ------- | ------- |
975-
| `DB_POOL_SIZE` | SQLAlchemy connection pool size | `200` | int > 0 |
976-
| `DB_MAX_OVERFLOW` | Extra connections beyond pool | `10` | int ≥ 0 |
977-
| `DB_POOL_TIMEOUT` | Wait for connection (secs) | `30` | int > 0 |
978-
| `DB_POOL_RECYCLE` | Recycle connections (secs) | `3600` | int > 0 |
973+
| Setting | Description | Default | Options |
974+
| ----------------------- | ------------------------------- | ------- | ------- |
975+
| `DB_POOL_SIZE` . | SQLAlchemy connection pool size | `200` | int > 0 |
976+
| `DB_MAX_OVERFLOW`. | Extra connections beyond pool | `10` | int ≥ 0 |
977+
| `DB_POOL_TIMEOUT`. | Wait for connection (secs) | `30` | int > 0 |
978+
| `DB_POOL_RECYCLE`. | Recycle connections (secs) | `3600` | int > 0 |
979+
| `DB_MAX_RETRIES` . | Max Retry Attempts | `3` | int > 0 |
980+
| `DB_RETRY_INTERVAL_MS` | Retry Interval (ms) | `2000` | int > 0 |
979981

980982
### Cache Backend
981983

982-
| Setting | Description | Default | Options |
983-
| -------------- | -------------------------- | -------- | ------------------------ |
984-
| `CACHE_TYPE` | Backend (`memory`/`redis`) | `memory` | `none`, `memory`,`redis` |
985-
| `REDIS_URL` | Redis connection URL | (none) | string or empty |
986-
| `CACHE_PREFIX` | Key prefix | `mcpgw:` | string |
984+
| Setting | Description | Default | Options |
985+
| ------------------------- | -------------------------- | -------- | ------------------------ |
986+
| `CACHE_TYPE` | Backend (`memory`/`redis`) | `memory` | `none`, `memory`,`redis` |
987+
| `REDIS_URL` | Redis connection URL | (none) | string or empty |
988+
| `CACHE_PREFIX` | Key prefix | `mcpgw:` | string |
989+
| `REDIS_MAX_RETRIES` | Max Retry Attempts | `3` | int > 0 |
990+
| `REDIS_RETRY_INTERVAL_MS` | Retry Interval (ms) | `2000` | int > 0 |
987991

988992
> 🧠 `none` disables caching entirely. Use `memory` for dev, `database` for persistence, or `redis` for distributed caching.
989993

mcpgateway/config.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,13 +179,17 @@ def _parse_federation_peers(cls, v):
179179
db_max_overflow: int = 10
180180
db_pool_timeout: int = 30
181181
db_pool_recycle: int = 3600
182+
db_max_retries: int = 3
183+
db_retry_interval_ms: int = 2000
182184

183185
# Cache
184186
cache_type: str = "database" # memory or redis or database
185187
redis_url: Optional[str] = "redis://localhost:6379/0"
186188
cache_prefix: str = "mcpgw:"
187189
session_ttl: int = 3600
188190
message_ttl: int = 600
191+
redis_max_retries: int = 3
192+
redis_retry_interval_ms: int = 2000
189193

190194
# streamable http transport
191195
use_stateful_sessions: bool = False # Set to False to use stateless sessions without event store

mcpgateway/main.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,8 @@
9797
ResourceContent,
9898
Root,
9999
)
100+
from mcpgateway.utils.db_isready import wait_for_db_ready
101+
from mcpgateway.utils.redis_isready import wait_for_redis_ready
100102
from mcpgateway.utils.verify_credentials import require_auth, require_auth_override
101103
from mcpgateway.validation.jsonrpc import (
102104
JSONRPCError,
@@ -138,6 +140,9 @@
138140
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
139141
)
140142

143+
# Wait for database to be ready before creating tables
144+
wait_for_db_ready(max_tries=int(settings.db_max_retries), interval=int(settings.db_retry_interval_ms) / 1000, sync=True) # Converting ms to s
145+
141146
# Create database tables
142147
Base.metadata.create_all(bind=engine)
143148

@@ -154,6 +159,9 @@
154159
# Initialize session manager for Streamable HTTP transport
155160
streamable_http_session = SessionManagerWrapper()
156161

162+
# Wait for redis to be ready
163+
if settings.cache_type == "redis":
164+
wait_for_redis_ready(redis_url=settings.redis_url, max_retries=int(settings.redis_max_retries), retry_interval_ms=int(settings.redis_retry_interval_ms), sync=True)
157165

158166
# Initialize session registry
159167
session_registry = SessionRegistry(

mcpgateway/utils/redis_isready.py

Lines changed: 223 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,223 @@
1+
#!/usr/bin/env python3
2+
# -*- coding: utf-8 -*-
3+
"""redis_isready - Wait until Redis is ready and accepting connections
4+
5+
Copyright 2025
6+
SPDX-License-Identifier: Apache-2.0
7+
Authors: Reeve Barreto, Mihai Criveti
8+
9+
This helper blocks until the given **Redis** server (defined by a connection URL)
10+
successfully responds to a `PING` command. It is intended to delay application startup until Redis is online.
11+
12+
It can be used both **synchronously** or **asynchronously**, and will retry
13+
connections with a configurable interval and number of attempts.
14+
15+
Exit codes when executed as a script
16+
-----------------------------------
17+
* ``0`` - Redis ready.
18+
* ``1`` - all attempts exhausted / timed-out.
19+
* ``2`` - :pypi:`redis` is **not** installed.
20+
* ``3`` - invalid parameter combination (``max_retries``/``retry_interval_ms``).
21+
22+
Features
23+
--------
24+
* Supports any valid Redis URL supported by :pypi:`redis`.
25+
* Retry settings are configurable via *environment variables*.
26+
* Works both **synchronously** (blocking) and **asynchronously**.
27+
28+
Environment variables
29+
---------------------
30+
These environment variables can be used to configure retry behavior and Redis connection.
31+
32+
+-----------------------------+-----------------------------------------------+-----------------------------+
33+
| Name | Description | Default |
34+
+=============================+===============================================+=============================+
35+
| ``REDIS_URL`` | Redis connection URL | ``redis://localhost:6379/0``|
36+
| ``REDIS_MAX_RETRIES`` | Maximum retry attempts before failing | ``3`` |
37+
| ``REDIS_RETRY_INTERVAL_MS`` | Delay between retries *(milliseconds)* | ``2000`` |
38+
| ``LOG_LEVEL`` | Log verbosity when not set via ``--log-level``| ``INFO`` |
39+
+-----------------------------+-----------------------------------------------+-----------------------------+
40+
41+
Usage examples
42+
--------------
43+
Shell ::
44+
45+
python redis_isready.py
46+
python redis_isready.py --redis-url "redis://localhost:6379/0" \
47+
--max-retries 5 --retry-interval-ms 500
48+
49+
Python ::
50+
51+
from redis_isready import wait_for_redis_ready
52+
53+
await wait_for_redis_ready() # asynchronous
54+
wait_for_redis_ready(sync=True) # synchronous / blocking
55+
"""
56+
57+
58+
# Standard
59+
import argparse
60+
import asyncio
61+
import logging
62+
import os
63+
import sys
64+
import time
65+
from typing import Any, Optional
66+
67+
# ---------------------------------------------------------------------------
68+
# Third-party imports - abort early if redis is missing
69+
# ---------------------------------------------------------------------------
70+
try:
71+
# Third-Party
72+
from redis import Redis
73+
except ImportError: # pragma: no cover - handled at runtime for the CLI
74+
sys.stderr.write("redis library not installed - aborting (pip install redis)\n")
75+
sys.exit(2)
76+
77+
# Environment variables
78+
REDIS_URL = os.getenv("REDIS_URL", "redis://localhost:6379/0")
79+
REDIS_MAX_RETRIES = int(os.getenv("REDIS_MAX_RETRIES", "3"))
80+
REDIS_RETRY_INTERVAL_MS = int(os.getenv("REDIS_RETRY_INTERVAL_MS", "2000"))
81+
82+
LOG_LEVEL = os.getenv("LOG_LEVEL", "INFO").upper()
83+
84+
85+
def wait_for_redis_ready(
86+
*,
87+
redis_url: str = REDIS_URL,
88+
max_retries: int = REDIS_MAX_RETRIES,
89+
retry_interval_ms: int = REDIS_RETRY_INTERVAL_MS,
90+
logger: Optional[logging.Logger] = None,
91+
sync: bool = False,
92+
) -> None:
93+
"""
94+
Wait until a Redis server is ready to accept connections.
95+
96+
This function attempts to connect to Redis and issue a `PING` command,
97+
retrying if the connection fails. It can run synchronously (blocking)
98+
or asynchronously using an executor. Intended for use during service
99+
startup to ensure Redis is reachable before proceeding.
100+
101+
Args:
102+
redis_url : str
103+
Redis connection URL. Defaults to the value of the `REDIS_URL` environment variable.
104+
max_retries : int
105+
Maximum number of connection attempts before failing.
106+
retry_interval_ms : int
107+
Delay between retry attempts, in milliseconds.
108+
logger : logging.Logger, optional
109+
Logger instance to use. If not provided, a default logger is configured.
110+
sync : bool
111+
If True, runs the probe synchronously. If False (default), runs it asynchronously.
112+
113+
Raises:
114+
RuntimeError: If Redis does not respond successfully after all retry attempts.
115+
"""
116+
log = logger or logging.getLogger("redis_isready")
117+
if not log.handlers: # basicConfig **once** - respects *log.setLevel* later
118+
logging.basicConfig(
119+
level=getattr(logging, LOG_LEVEL, logging.INFO),
120+
format="%(asctime)s [%(levelname)s] %(message)s",
121+
datefmt="%Y-%m-%dT%H:%M:%S",
122+
)
123+
124+
if max_retries < 1 or retry_interval_ms <= 0:
125+
raise RuntimeError("Invalid max_retries or retry_interval_ms values")
126+
127+
log.info(f"Probing Redis at {redis_url} (interval={retry_interval_ms}ms, max_retries={max_retries})")
128+
129+
def _probe(*_: Any) -> None:
130+
"""
131+
Inner synchronous probe running in either the current or a thread.
132+
133+
Args:
134+
*_: Ignored arguments (for compatibility with run_in_executor).
135+
136+
Returns:
137+
None - the function exits successfully once Redis answers.
138+
139+
Raises:
140+
RuntimeError: Forwarded after exhausting ``max_retries`` attempts.
141+
"""
142+
redis_client = Redis.from_url(redis_url)
143+
for attempt in range(1, max_retries + 1):
144+
try:
145+
redis_client.ping()
146+
log.info(f"Redis ready (attempt {attempt})")
147+
return
148+
except Exception as exc:
149+
log.debug(f"Attempt {attempt}/{max_retries} failed ({exc}) - retrying in {retry_interval_ms} ms")
150+
if attempt < max_retries: # Don't sleep on the last attempt
151+
time.sleep(retry_interval_ms / 1000.0)
152+
raise RuntimeError(f"Redis not ready after {max_retries} attempts")
153+
154+
if sync:
155+
_probe()
156+
else:
157+
loop = asyncio.get_event_loop()
158+
loop.run_until_complete(loop.run_in_executor(None, _probe))
159+
160+
161+
# ---------------------------------------------------------------------------
162+
# CLI helpers
163+
# ---------------------------------------------------------------------------
164+
165+
166+
def _parse_cli() -> argparse.Namespace:
167+
"""Parse command-line arguments for the *redis_isready* CLI wrapper.
168+
169+
Returns:
170+
Parsed :class:`argparse.Namespace` holding all CLI options.
171+
"""
172+
173+
parser = argparse.ArgumentParser(
174+
description="Wait until Redis is ready and accepting connections.",
175+
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
176+
)
177+
parser.add_argument(
178+
"--redis-url",
179+
default=REDIS_URL,
180+
help="Redis connection URL (env REDIS_URL)",
181+
)
182+
parser.add_argument("--max-retries", type=int, default=REDIS_MAX_RETRIES, help="Maximum connection attempts")
183+
parser.add_argument("--retry-interval-ms", type=int, default=REDIS_RETRY_INTERVAL_MS, help="Delay between attempts in milliseconds")
184+
parser.add_argument("--log-level", default=LOG_LEVEL, help="Logging level (DEBUG, INFO, …)")
185+
return parser.parse_args()
186+
187+
188+
def main() -> None: # pragma: no cover
189+
"""CLI entry-point.
190+
191+
* Parses command-line options.
192+
* Applies ``--log-level`` to the *redis_isready* logger **before** the first
193+
message is emitted.
194+
* Delegates the actual probing to :func:`wait_for_redis_ready`.
195+
* Exits with:
196+
197+
* ``0`` - Redis became ready.
198+
* ``1`` - connection attempts exhausted.
199+
* ``2`` - redis library missing.
200+
* ``3`` - invalid parameter combination.
201+
"""
202+
cli_args = _parse_cli()
203+
204+
log = logging.getLogger("redis_isready")
205+
log.setLevel(cli_args.log_level.upper())
206+
207+
try:
208+
wait_for_redis_ready(
209+
redis_url=cli_args.redis_url,
210+
max_retries=cli_args.max_retries,
211+
retry_interval_ms=cli_args.retry_interval_ms,
212+
sync=True,
213+
logger=log,
214+
)
215+
except RuntimeError as exc:
216+
log.error(f"Redis unavailable: {exc}")
217+
sys.exit(1)
218+
219+
sys.exit(0)
220+
221+
222+
if __name__ == "__main__": # pragma: no cover
223+
main()

mcpgateway/utils/services_auth.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# -*- coding: utf-8 -*-
2-
"""
2+
"""mcpgateway.utils.services_auth - Authentication utilities for MCP Gateway
33
44
Copyright 2025
55
SPDX-License-Identifier: Apache-2.0

0 commit comments

Comments
 (0)