diff --git a/src/query/service/src/servers/http/http_services.rs b/src/query/service/src/servers/http/http_services.rs index b7b83e3e6be55..cdef6bbe53c33 100644 --- a/src/query/service/src/servers/http/http_services.rs +++ b/src/query/service/src/servers/http/http_services.rs @@ -37,6 +37,7 @@ use poem::Route; use super::v1::upload_to_stage; use crate::auth::AuthMgr; use crate::servers::http::middleware::HTTPSessionMiddleware; +use crate::servers::http::middleware::PanicHandler; use crate::servers::http::v1::clickhouse_router; use crate::servers::http::v1::list_suggestions; use crate::servers::http::v1::query_route; @@ -122,7 +123,7 @@ impl HttpHandler { .nest("/health", ep_health), }; ep.with(NormalizePath::new(TrailingSlash::Trim)) - .with(CatchPanic::new()) + .with(CatchPanic::new().with_handler(PanicHandler::new())) .boxed() } diff --git a/src/query/service/src/servers/http/metrics.rs b/src/query/service/src/servers/http/metrics.rs index f182807aaab0a..4bb902e2e5f68 100644 --- a/src/query/service/src/servers/http/metrics.rs +++ b/src/query/service/src/servers/http/metrics.rs @@ -23,3 +23,12 @@ pub fn metrics_incr_http_slow_request_count(method: String, api: String, status: let labels = [("method", method), ("api", api), ("status", status)]; counter!("query_http_slow_requests_count", 1, &labels); } + +pub fn metrics_incr_http_response_errors_count(err: String, code: u16) { + let labels = [("err", err), ("code", code.to_string())]; + counter!("query_http_response_errors_count", 1, &labels); +} + +pub fn metrics_incr_http_response_panics_count() { + counter!("query_http_response_panics_count", 1); +} diff --git a/src/query/service/src/servers/http/middleware.rs b/src/query/service/src/servers/http/middleware.rs index 93738bbe8657e..ff96692d59a50 100644 --- a/src/query/service/src/servers/http/middleware.rs +++ b/src/query/service/src/servers/http/middleware.rs @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::any::Any; use std::collections::HashMap; use std::sync::Arc; use std::time::Instant; @@ -41,6 +42,7 @@ use super::v1::HttpQueryContext; use crate::auth::AuthMgr; use crate::auth::Credential; use crate::servers::http::metrics::metrics_incr_http_request_count; +use crate::servers::http::metrics::metrics_incr_http_response_panics_count; use crate::servers::http::metrics::metrics_incr_http_slow_request_count; use crate::servers::HttpHandlerKind; use crate::sessions::SessionManager; @@ -314,3 +316,21 @@ impl Endpoint for MetricsMiddlewareEndpoint { Ok(resp) } } + +#[derive(Clone, Debug)] +pub(crate) struct PanicHandler {} + +impl PanicHandler { + pub fn new() -> Self { + Self {} + } +} + +impl poem::middleware::PanicHandler for PanicHandler { + type Response = (StatusCode, &'static str); + + fn get_response(&self, _err: Box) -> Self::Response { + metrics_incr_http_response_panics_count(); + (StatusCode::INTERNAL_SERVER_ERROR, "internal server error") + } +} diff --git a/src/query/service/src/servers/http/v1/http_query_handlers.rs b/src/query/service/src/servers/http/v1/http_query_handlers.rs index 495a1eb78a189..535577b52751a 100644 --- a/src/query/service/src/servers/http/v1/http_query_handlers.rs +++ b/src/query/service/src/servers/http/v1/http_query_handlers.rs @@ -33,6 +33,7 @@ use serde_json::Value as JsonValue; use super::query::ExecuteStateKind; use super::query::HttpQueryRequest; use super::query::HttpQueryResponseInternal; +use crate::servers::http::metrics::metrics_incr_http_response_errors_count; use crate::servers::http::middleware::MetricsMiddleware; use crate::servers::http::v1::query::Progresses; use crate::servers::http::v1::HttpQueryContext; @@ -156,6 +157,10 @@ impl QueryResponse { } }; + if let Some(err) = &r.state.error { + metrics_incr_http_response_errors_count(err.name(), err.code()); + } + let schema = data.schema().clone(); let session_id = r.session_id.clone(); let stats = QueryStats { @@ -163,6 +168,7 @@ impl QueryResponse { running_time_ms: state.running_time_ms, }; let rows = data.data.len(); + Json(QueryResponse { data: data.into(), state: state.state, @@ -184,6 +190,7 @@ impl QueryResponse { } pub(crate) fn fail_to_start_sql(err: &ErrorCode) -> impl IntoResponse { + metrics_incr_http_response_errors_count(err.name(), err.code()); Json(QueryResponse { id: "".to_string(), stats: QueryStats::default(),