Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

implement Redis connection pooling #5942

Merged
merged 6 commits into from
Sep 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changesets/feat_geal_implement_redis_connection_pooling.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
### Support Redis connection pooling ([PR #5942](https://github.com/apollographql/router/pull/5942))

This implements Redis connection pooling, for APQ, query planner and entity cache Redis usage. This can improve performance when there is some contention on the Redis connection, or some latency in Redis calls.

By [@Geal](https://github.com/Geal) in https://github.com/apollographql/router/pull/5942
132 changes: 63 additions & 69 deletions apollo-router/src/cache/redis.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ use fred::prelude::KeysInterface;
use fred::prelude::RedisClient;
use fred::prelude::RedisError;
use fred::prelude::RedisErrorKind;
use fred::prelude::RedisPool;
use fred::types::ClusterRouting;
use fred::types::Expiration;
use fred::types::FromRedis;
Expand Down Expand Up @@ -52,7 +53,7 @@ where

#[derive(Clone)]
pub(crate) struct RedisCacheStorage {
inner: Arc<RedisClient>,
inner: Arc<RedisPool>,
namespace: Option<Arc<String>>,
pub(crate) ttl: Option<Duration>,
is_cluster: bool,
Expand Down Expand Up @@ -168,47 +169,16 @@ impl RedisCacheStorage {
});
}

let client = RedisClient::new(
Self::create_client(
client_config,
Some(PerformanceConfig {
default_command_timeout: config.timeout.unwrap_or(Duration::from_millis(500)),
..Default::default()
}),
None,
Some(ReconnectPolicy::new_exponential(0, 1, 2000, 5)),
);
let _handle = client.connect();

// spawn tasks that listen for connection close or reconnect events
let mut error_rx = client.error_rx();
let mut reconnect_rx = client.reconnect_rx();

tokio::spawn(async move {
while let Ok(error) = error_rx.recv().await {
tracing::error!("Client disconnected with error: {:?}", error);
}
});
tokio::spawn(async move {
while reconnect_rx.recv().await.is_ok() {
tracing::info!("Redis client reconnected.");
}
});

// a TLS connection to a TCP Redis could hang, so we add a timeout
tokio::time::timeout(Duration::from_secs(5), client.wait_for_connect())
.await
.map_err(|_| {
RedisError::new(RedisErrorKind::Timeout, "timeout connecting to Redis")
})??;

tracing::trace!("redis connection established");
Ok(Self {
inner: Arc::new(client),
namespace: config.namespace.map(Arc::new),
ttl: config.ttl,
config.timeout.unwrap_or(Duration::from_millis(500)),
config.pool_size as usize,
config.namespace,
config.ttl,
config.reset_ttl,
is_cluster,
reset_ttl: config.reset_ttl,
})
)
.await
}

#[cfg(test)]
Expand All @@ -218,46 +188,70 @@ impl RedisCacheStorage {
..Default::default()
};

let client = RedisClient::new(
Self::create_client(
client_config,
Duration::from_millis(2),
1,
None,
None,
false,
false,
)
.await
}

async fn create_client(
client_config: RedisConfig,
timeout: Duration,
pool_size: usize,
namespace: Option<String>,
ttl: Option<Duration>,
reset_ttl: bool,
is_cluster: bool,
) -> Result<Self, BoxError> {
let pooled_client = RedisPool::new(
client_config,
Some(PerformanceConfig {
default_command_timeout: Duration::from_millis(2),
default_command_timeout: timeout,
..Default::default()
}),
None,
Some(ReconnectPolicy::new_exponential(0, 1, 2000, 5)),
);
let _handle = client.connect();

// spawn tasks that listen for connection close or reconnect events
let mut error_rx = client.error_rx();
let mut reconnect_rx = client.reconnect_rx();

tokio::spawn(async move {
while let Ok(error) = error_rx.recv().await {
tracing::error!("Client disconnected with error: {:?}", error);
}
});
tokio::spawn(async move {
while reconnect_rx.recv().await.is_ok() {
tracing::info!("Redis client reconnected.");
}
});
pool_size,
)?;
let _handle = pooled_client.connect();

for client in pooled_client.clients() {
// spawn tasks that listen for connection close or reconnect events
let mut error_rx = client.error_rx();
let mut reconnect_rx = client.reconnect_rx();

tokio::spawn(async move {
while let Ok(error) = error_rx.recv().await {
tracing::error!("Client disconnected with error: {:?}", error);
}
});
tokio::spawn(async move {
while reconnect_rx.recv().await.is_ok() {
tracing::info!("Redis client reconnected.");
}
});
}

// a TLS connection to a TCP Redis could hang, so we add a timeout
tokio::time::timeout(Duration::from_secs(5), client.wait_for_connect())
tokio::time::timeout(Duration::from_secs(5), pooled_client.wait_for_connect())
.await
.map_err(|_| {
RedisError::new(RedisErrorKind::Timeout, "timeout connecting to Redis")
})??;

tracing::trace!("redis connection established");
Ok(Self {
inner: Arc::new(client),
ttl: None,
namespace: None,
is_cluster: false,
reset_ttl: false,
inner: Arc::new(pooled_client),
namespace: namespace.map(Arc::new),
ttl,
is_cluster,
reset_ttl,
})
}

Expand Down Expand Up @@ -370,7 +364,7 @@ impl RedisCacheStorage {
key: RedisKey<K>,
) -> Option<RedisValue<V>> {
if self.reset_ttl && self.ttl.is_some() {
let pipeline: fred::clients::Pipeline<RedisClient> = self.inner.pipeline();
let pipeline: fred::clients::Pipeline<RedisClient> = self.inner.next().pipeline();
let key = self.make_key(key);
let res = pipeline
.get::<fred::types::RedisValue, _>(&key)
Expand Down Expand Up @@ -541,7 +535,7 @@ impl RedisCacheStorage {
None => self.inner.mset(data.to_owned()).await,
Some(ttl) => {
let expiration = Some(Expiration::EX(ttl.as_secs() as i64));
let pipeline = self.inner.pipeline();
let pipeline = self.inner.next().pipeline();

for (key, value) in data {
let _ = pipeline
Expand Down Expand Up @@ -593,9 +587,9 @@ impl RedisCacheStorage {
count: Option<u32>,
) -> Pin<Box<dyn Stream<Item = Result<ScanResult, RedisError>> + Send>> {
if self.is_cluster {
Box::pin(self.inner.scan_cluster(pattern, count, None))
Box::pin(self.inner.next().scan_cluster(pattern, count, None))
} else {
Box::pin(self.inner.scan(pattern, count, None))
Box::pin(self.inner.next().scan(pattern, count, None))
}
}
}
Expand Down
17 changes: 17 additions & 0 deletions apollo-router/src/configuration/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1010,13 +1010,21 @@ pub(crate) struct QueryPlanRedisCache {
#[serde(default = "default_reset_ttl")]
/// When a TTL is set on a key, reset it when reading the data from that key
pub(crate) reset_ttl: bool,

#[serde(default = "default_query_planner_cache_pool_size")]
/// The size of the Redis connection pool
pub(crate) pool_size: u32,
}

fn default_query_plan_cache_ttl() -> Duration {
// Default TTL set to 30 days
Duration::from_secs(86400 * 30)
}

fn default_query_planner_cache_pool_size() -> u32 {
1
}

/// Cache configuration
#[derive(Debug, Clone, Default, Deserialize, Serialize, JsonSchema)]
#[serde(deny_unknown_fields, default)]
Expand Down Expand Up @@ -1088,12 +1096,20 @@ pub(crate) struct RedisCache {
#[serde(default = "default_reset_ttl")]
/// When a TTL is set on a key, reset it when reading the data from that key
pub(crate) reset_ttl: bool,

#[serde(default = "default_pool_size")]
/// The size of the Redis connection pool
pub(crate) pool_size: u32,
}

fn default_required_to_start() -> bool {
false
}

fn default_pool_size() -> u32 {
1
}

impl From<QueryPlanRedisCache> for RedisCache {
fn from(value: QueryPlanRedisCache) -> Self {
RedisCache {
Expand All @@ -1106,6 +1122,7 @@ impl From<QueryPlanRedisCache> for RedisCache {
tls: value.tls,
required_to_start: value.required_to_start,
reset_ttl: value.reset_ttl,
pool_size: value.pool_size,
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4333,6 +4333,13 @@ expression: "&schema"
"nullable": true,
"type": "string"
},
"pool_size": {
"default": 1,
"description": "The size of the Redis connection pool",
"format": "uint32",
"minimum": 0.0,
"type": "integer"
},
"required_to_start": {
"default": false,
"description": "Prevents the router from starting if it cannot connect to Redis",
Expand Down Expand Up @@ -4543,6 +4550,13 @@ expression: "&schema"
"nullable": true,
"type": "string"
},
"pool_size": {
"default": 1,
"description": "The size of the Redis connection pool",
"format": "uint32",
"minimum": 0.0,
"type": "integer"
},
"required_to_start": {
"default": false,
"description": "Prevents the router from starting if it cannot connect to Redis",
Expand Down
3 changes: 2 additions & 1 deletion apollo-router/tests/integration/redis.rs
Original file line number Diff line number Diff line change
Expand Up @@ -369,7 +369,8 @@ async fn entity_cache() -> Result<(), BoxError> {
"enabled": false,
"redis": {
"urls": ["redis://127.0.0.1:6379"],
"ttl": "2s"
"ttl": "2s",
"pool_size": 3
},
},
"subgraphs": {
Expand Down
7 changes: 6 additions & 1 deletion docs/source/configuration/distributed-caching.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,7 @@ supergraph:
#tls:
required_to_start: false # Optional, defaults to false
reset_ttl: true # Optional, defaults to true
pool_size: 4 # Optional, defaults to 1
```

#### Timeout
Expand All @@ -168,4 +169,8 @@ When active, the `required_to_start` option will prevent the router from startin

### Reset TTL

When this option is active, accessing a cache entry in Redis will reset its expiration.
When this option is active, accessing a cache entry in Redis will reset its expiration.

### Pool size

The `pool_size` option defines the number of connections to Redis that the router will open. By default, the router will open a single connection to Redis. If there is a lot of traffic between router and Redis and/or there is some latency in thos requests, it is recommended to increase the pool size to reduce that latency.
Loading