-
Notifications
You must be signed in to change notification settings - Fork 38
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(rest): add health API for readiness and liveness probes
Signed-off-by: Niladri Halder <niladri.halder26@gmail.com>
- Loading branch information
Showing
5 changed files
with
141 additions
and
12 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
use crate::v0::core_grpc; | ||
use grpc::operations::node::traits::NodeOperations; | ||
use std::{ | ||
sync::RwLock, | ||
time::{Duration, Instant}, | ||
}; | ||
|
||
/// This is a type to cache the liveness of the agent-core service. | ||
/// This is meant to wrapped inside an Arc and used across threads. | ||
pub struct CachedCoreState { | ||
state: RwLock<ServerState>, | ||
cache_duration: Duration, | ||
} | ||
|
||
/// This type remembers a liveness state, and when this data was refreshed. | ||
struct ServerState { | ||
is_live: bool, | ||
last_updated: Instant, | ||
} | ||
|
||
impl CachedCoreState { | ||
/// Create a new cache for serving readiness health checks based on agent-core health. | ||
pub async fn new(cache_duration: Duration) -> Self { | ||
let agent_core_is_live = core_grpc().node().probe(None).await.unwrap_or(false); | ||
|
||
CachedCoreState { | ||
state: RwLock::new(ServerState { | ||
is_live: agent_core_is_live, | ||
last_updated: Instant::now(), | ||
}), | ||
cache_duration, | ||
} | ||
} | ||
|
||
/// Get the cached state of the agent-core service, or assume it's unavailable if something | ||
/// went wrong. | ||
pub async fn get_or_assume_unavailable(&self) -> bool { | ||
let should_update = { | ||
let state = self.state.read().unwrap(); | ||
state.last_updated.elapsed() >= self.cache_duration | ||
}; | ||
|
||
if should_update { | ||
self.update_or_assume_unavailable().await; | ||
} | ||
|
||
self.state.read().unwrap().is_live | ||
} | ||
|
||
/// Update the state of the agent-core service, or assume it's unavailable if something | ||
/// went wrong. | ||
pub async fn update_or_assume_unavailable(&self) { | ||
let new_value = core_grpc().node().probe(None).await.unwrap_or(false); | ||
let mut state = self.state.write().unwrap(); | ||
state.is_live = new_value; | ||
state.last_updated = Instant::now(); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
use crate::CachedCoreState; | ||
use actix_web::{get, web::Data, HttpResponse, Responder}; | ||
|
||
/// Liveness probe check. Failure will result in Pod restart. 200 on success. | ||
#[get("/live")] | ||
async fn liveness(_cached_core_state: Data<CachedCoreState>) -> impl Responder { | ||
HttpResponse::Ok() | ||
.content_type("text/plain; charset=utf-8") | ||
.insert_header(("X-Content-Type-Options", "nosniff")) | ||
.body("live") | ||
} | ||
|
||
/// Readiness probe check. Failure will result in removal of Container from Kubernetes service | ||
/// target pool. 200 on success, 503 on failure. | ||
#[get("/ready")] | ||
async fn readiness(cached_core_state: Data<CachedCoreState>) -> HttpResponse { | ||
if cached_core_state.get_or_assume_unavailable().await { | ||
return HttpResponse::Ok() | ||
.content_type("text/plain; charset=utf-8") | ||
.insert_header(("X-Content-Type-Options", "nosniff")) | ||
.body("ready"); | ||
} | ||
|
||
HttpResponse::ServiceUnavailable() | ||
.content_type("text/plain; charset=utf-8") | ||
.insert_header(("X-Content-Type-Options", "nosniff")) | ||
.body("not ready") | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
/// Has tools to collect the liveness state of the agent-core service. | ||
pub mod core_state; | ||
/// Actix request handlers for health checks. | ||
pub mod handlers; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters