diff --git a/ee/tabby-webserver/src/rate_limit.rs b/ee/tabby-webserver/src/rate_limit.rs index c85bd5f7c8c3..16abdda8a172 100644 --- a/ee/tabby-webserver/src/rate_limit.rs +++ b/ee/tabby-webserver/src/rate_limit.rs @@ -20,7 +20,12 @@ impl Default for UserRateLimiter { } impl UserRateLimiter { - pub async fn is_allowed(&self, user_id: &str) -> bool { + pub async fn is_allowed(&self, uri: &axum::http::Uri, user_id: &str) -> bool { + // Do not limit health check requests. + if uri.path().ends_with("/v1/health") || uri.path().ends_with("/v1beta/health") { + return true; + } + let mut rate_limiters = self.rate_limiters.lock().await; let rate_limiter = rate_limiters.cache_get_or_set_with(user_id.to_string(), || { // Create a new rate limiter for this user. @@ -50,12 +55,18 @@ mod tests { let user_id = "test_user"; let rate_limiter = UserRateLimiter::default(); + let uri: axum::http::Uri = "/v1/completions".parse().unwrap(); + let healthcheck_uri: axum::http::Uri = "/v1/health".parse().unwrap(); + // Test that the first `USER_REQUEST_LIMIT_PER_MINUTE` requests are allowed for _ in 0..USER_REQUEST_LIMIT_PER_MINUTE { - assert!(rate_limiter.is_allowed(user_id).await); + assert!(rate_limiter.is_allowed(&uri, user_id).await); } // Test that the 201st request is not allowed - assert!(!rate_limiter.is_allowed(user_id).await); + assert!(!rate_limiter.is_allowed(&uri, user_id).await); + + // Test that health check requests are not limited + assert!(rate_limiter.is_allowed(&healthcheck_uri, user_id).await); } } diff --git a/ee/tabby-webserver/src/service/mod.rs b/ee/tabby-webserver/src/service/mod.rs index eb3300e6310a..f67734392f57 100644 --- a/ee/tabby-webserver/src/service/mod.rs +++ b/ee/tabby-webserver/src/service/mod.rs @@ -230,7 +230,11 @@ impl WorkerService for ServerContext { if let Some(user) = user { // Apply rate limiting when `user` is not none. - if !self.user_rate_limiter.is_allowed(&user).await { + if !self + .user_rate_limiter + .is_allowed(request.uri(), &user) + .await + { return axum::response::Response::builder() .status(StatusCode::TOO_MANY_REQUESTS) .body(Body::empty())