From 214e4125ba0600d6f23e61facefcda9606786d7f Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Mon, 21 Oct 2024 11:32:57 -0700 Subject: [PATCH] feat(prometheus.py): emit time_to_first_token metric on prometheus Closes https://github.com/BerriAI/litellm/issues/6334 --- docs/my-website/docs/proxy/prometheus.md | 5 ++-- litellm/integrations/prometheus.py | 29 ++++++++++++++++++++++++ litellm/proxy/_new_secret_config.yaml | 9 +++++--- 3 files changed, 38 insertions(+), 5 deletions(-) diff --git a/docs/my-website/docs/proxy/prometheus.md b/docs/my-website/docs/proxy/prometheus.md index ef3a7b940df7..c72a66fb6bfc 100644 --- a/docs/my-website/docs/proxy/prometheus.md +++ b/docs/my-website/docs/proxy/prometheus.md @@ -134,8 +134,9 @@ Use this for LLM API Error monitoring and tracking remaining rate limits and tok | Metric Name | Description | |----------------------|--------------------------------------| -| `litellm_request_total_latency_metric` | Total latency (seconds) for a request to LiteLLM Proxy Server - tracked for labels `litellm_call_id`, `model`, `user_api_key`, `user_api_key_alias`, `user_api_team`, `user_api_team_alias` | -| `litellm_llm_api_latency_metric` | Latency (seconds) for just the LLM API call - tracked for labels `litellm_call_id`, `model`, `user_api_key`, `user_api_key_alias`, `user_api_team`, `user_api_team_alias` | +| `litellm_request_total_latency_metric` | Total latency (seconds) for a request to LiteLLM Proxy Server - tracked for labels `model`, `hashed_api_key`, `api_key_alias`, `team`, `team_alias` | +| `litellm_llm_api_latency_metric` | Latency (seconds) for just the LLM API call - tracked for labels `model`, `hashed_api_key`, `api_key_alias`, `team`, `team_alias` | +| `litellm_llm_api_time_to_first_token_metric` | Time to first token for LLM API call - tracked for labels `model`, `hashed_api_key`, `api_key_alias`, `team`, `team_alias` | ## Virtual Key - Budget, Rate Limit Metrics diff --git a/litellm/integrations/prometheus.py b/litellm/integrations/prometheus.py index 370ab1575511..39a32ca55d50 100644 --- a/litellm/integrations/prometheus.py +++ b/litellm/integrations/prometheus.py @@ -97,6 +97,19 @@ def __init__( buckets=LATENCY_BUCKETS, ) + self.litellm_llm_api_time_to_first_token_metric = Histogram( + "litellm_llm_api_time_to_first_token_metric", + "Time to first token for a models LLM API call", + labelnames=[ + "model", + "hashed_api_key", + "api_key_alias", + "team", + "team_alias", + ], + buckets=LATENCY_BUCKETS, + ) + # Counter for spend self.litellm_spend_metric = Counter( "litellm_spend_metric", @@ -468,6 +481,22 @@ async def async_log_success_event( # noqa: PLR0915 total_time_seconds = total_time.total_seconds() api_call_start_time = kwargs.get("api_call_start_time", None) + completion_start_time = kwargs.get("completion_start_time", None) + + if completion_start_time is not None and isinstance( + completion_start_time, datetime + ): + time_to_first_token_seconds = ( + completion_start_time - api_call_start_time + ).total_seconds() + self.litellm_llm_api_time_to_first_token_metric.labels( + model, + user_api_key, + user_api_key_alias, + user_api_team, + user_api_team_alias, + ).observe(time_to_first_token_seconds) + if api_call_start_time is not None and isinstance( api_call_start_time, datetime ): diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml index 283bddba7db7..105fabbdda62 100644 --- a/litellm/proxy/_new_secret_config.yaml +++ b/litellm/proxy/_new_secret_config.yaml @@ -1,5 +1,8 @@ model_list: - - model_name: jina-embedding + - model_name: gpt-3.5-turbo litellm_params: - model: jina_ai/jina-embeddings-v3 - api_key: jina_658322978426431b9fe41bd6b29563c1wJQ1JDqf13S7BdxA_RkaNfvc-Gdj \ No newline at end of file + model: gpt-3.5-turbo + api_key: os.environ/OPENAI_API_KEY + +litellm_settings: + callbacks: ["prometheus"] \ No newline at end of file