Skip to content

Commit

Permalink
cherry-pick #19433 to 1.34 (#19473)
Browse files Browse the repository at this point in the history
## Description 

cherry pick of #19433
  • Loading branch information
johnjmartin authored Sep 20, 2024
1 parent 003ac15 commit c23f605
Showing 1 changed file with 51 additions and 1 deletion.
52 changes: 51 additions & 1 deletion crates/sui-node/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2038,7 +2038,7 @@ pub async fn build_http_server(

if config.enable_experimental_rest_api {
let mut rest_service = sui_rest_api::RestService::new(
Arc::new(RestReadStore::new(state, store)),
Arc::new(RestReadStore::new(state.clone(), store)),
software_version,
);

Expand All @@ -2050,6 +2050,11 @@ pub async fn build_http_server(

router = router.merge(rest_service.into_router());
}
// TODO: Remove this health check when experimental REST API becomes default
// This is a copy of the health check in crates/sui-rest-api/src/health.rs
router = router
.route("/health", axum::routing::get(health_check_handler))
.route_layer(axum::Extension(state));

let listener = tokio::net::TcpListener::bind(&config.json_rpc_address)
.await
Expand All @@ -2072,6 +2077,51 @@ pub async fn build_http_server(
Ok(Some(handle))
}

#[derive(Debug, serde::Serialize, serde::Deserialize)]
pub struct Threshold {
pub threshold_seconds: Option<u32>,
}

async fn health_check_handler(
axum::extract::Query(Threshold { threshold_seconds }): axum::extract::Query<Threshold>,
axum::Extension(state): axum::Extension<Arc<AuthorityState>>,
) -> impl axum::response::IntoResponse {
if let Some(threshold_seconds) = threshold_seconds {
// Attempt to get the latest checkpoint
let summary = match state
.get_checkpoint_store()
.get_highest_executed_checkpoint()
{
Ok(Some(summary)) => summary,
Ok(None) => {
warn!("Highest executed checkpoint not found");
return (axum::http::StatusCode::SERVICE_UNAVAILABLE, "down");
}
Err(err) => {
warn!("Failed to retrieve highest executed checkpoint: {:?}", err);
return (axum::http::StatusCode::SERVICE_UNAVAILABLE, "down");
}
};

// Calculate the threshold time based on the provided threshold_seconds
let latest_chain_time = summary.timestamp();
let threshold =
std::time::SystemTime::now() - Duration::from_secs(threshold_seconds as u64);

// Check if the latest checkpoint is within the threshold
if latest_chain_time < threshold {
warn!(
?latest_chain_time,
?threshold,
"failing healthcheck due to checkpoint lag"
);
return (axum::http::StatusCode::SERVICE_UNAVAILABLE, "down");
}
}
// if health endpoint is responding and no threshold is given, respond success
(axum::http::StatusCode::OK, "up")
}

#[cfg(not(test))]
fn max_tx_per_checkpoint(protocol_config: &ProtocolConfig) -> usize {
protocol_config.max_transactions_per_checkpoint() as usize
Expand Down

0 comments on commit c23f605

Please sign in to comment.