Skip to content

Commit

Permalink
feat(proxy_cli.py): add new 'log_config' cli param (#6352)
Browse files Browse the repository at this point in the history
* feat(proxy_cli.py): add new 'log_config' cli param

Allows passing logging.conf to uvicorn on startup

* docs(cli.md): add logging conf to uvicorn cli docs

* fix(get_llm_provider_logic.py): fix default api base for litellm_proxy

Fixes #6332

* feat(openai_like/embedding): Add support for jina ai embeddings

Closes #6337

* docs(deploy.md): update entrypoint.sh filepath post-refactor

Fixes outdated docs

* feat(prometheus.py): emit time_to_first_token metric on prometheus

Closes #6334

* fix(prometheus.py): only emit time to first token metric if stream is True

enables more accurate ttft usage

* test: handle vertex api instability

* fix(get_llm_provider_logic.py): fix import

* fix(openai.py): fix deepinfra default api base

* fix(anthropic/transformation.py): remove anthropic beta header (#6361)
  • Loading branch information
krrishdholakia committed Oct 22, 2024
1 parent 95a1069 commit dbbd0f2
Show file tree
Hide file tree
Showing 22 changed files with 839 additions and 260 deletions.
8 changes: 8 additions & 0 deletions docs/my-website/docs/proxy/cli.md
Original file line number Diff line number Diff line change
Expand Up @@ -176,3 +176,11 @@ Cli arguments, --host, --port, --num_workers
```
## --log_config
- **Default:** `None`
- **Type:** `str`
- Specify a log configuration file for uvicorn.
- **Usage:**
```shell
litellm --log_config path/to/log_config.conf
```
4 changes: 2 additions & 2 deletions docs/my-website/docs/proxy/deploy.md
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ WORKDIR /app
COPY config.yaml .
# Make sure your docker/entrypoint.sh is executable
RUN chmod +x entrypoint.sh
RUN chmod +x ./docker/entrypoint.sh
# Expose the necessary port
EXPOSE 4000/tcp
Expand Down Expand Up @@ -632,7 +632,7 @@ RUN rm -rf /app/litellm/proxy/_experimental/out/* && \
WORKDIR /app
# Make sure your entrypoint.sh is executable
RUN chmod +x entrypoint.sh
RUN chmod +x ./docker/entrypoint.sh
# Expose the necessary port
EXPOSE 4000/tcp
Expand Down
5 changes: 3 additions & 2 deletions docs/my-website/docs/proxy/prometheus.md
Original file line number Diff line number Diff line change
Expand Up @@ -134,8 +134,9 @@ Use this for LLM API Error monitoring and tracking remaining rate limits and tok

| Metric Name | Description |
|----------------------|--------------------------------------|
| `litellm_request_total_latency_metric` | Total latency (seconds) for a request to LiteLLM Proxy Server - tracked for labels `litellm_call_id`, `model`, `user_api_key`, `user_api_key_alias`, `user_api_team`, `user_api_team_alias` |
| `litellm_llm_api_latency_metric` | Latency (seconds) for just the LLM API call - tracked for labels `litellm_call_id`, `model`, `user_api_key`, `user_api_key_alias`, `user_api_team`, `user_api_team_alias` |
| `litellm_request_total_latency_metric` | Total latency (seconds) for a request to LiteLLM Proxy Server - tracked for labels `model`, `hashed_api_key`, `api_key_alias`, `team`, `team_alias` |
| `litellm_llm_api_latency_metric` | Latency (seconds) for just the LLM API call - tracked for labels `model`, `hashed_api_key`, `api_key_alias`, `team`, `team_alias` |
| `litellm_llm_api_time_to_first_token_metric` | Time to first token for LLM API call - tracked for labels `model`, `hashed_api_key`, `api_key_alias`, `team`, `team_alias` [Note: only emitted for streaming requests] |

## Virtual Key - Budget, Rate Limit Metrics

Expand Down
5 changes: 5 additions & 0 deletions litellm/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@
openai_key: Optional[str] = None
groq_key: Optional[str] = None
databricks_key: Optional[str] = None
openai_like_key: Optional[str] = None
azure_key: Optional[str] = None
anthropic_key: Optional[str] = None
replicate_key: Optional[str] = None
Expand Down Expand Up @@ -710,6 +711,8 @@ def add_known_models():

class LlmProviders(str, Enum):
OPENAI = "openai"
OPENAI_LIKE = "openai_like" # embedding only
JINA_AI = "jina_ai"
CUSTOM_OPENAI = "custom_openai"
TEXT_COMPLETION_OPENAI = "text-completion-openai"
COHERE = "cohere"
Expand Down Expand Up @@ -1013,6 +1016,7 @@ class LlmProviders(str, Enum):
from .llms.fireworks_ai.embed.fireworks_ai_transformation import (
FireworksAIEmbeddingConfig,
)
from .llms.jina_ai.embedding.transformation import JinaAIEmbeddingConfig
from .llms.volcengine import VolcEngineConfig
from .llms.text_completion_codestral import MistralTextCompletionConfig
from .llms.AzureOpenAI.azure import (
Expand All @@ -1022,6 +1026,7 @@ class LlmProviders(str, Enum):

from .llms.AzureOpenAI.chat.gpt_transformation import AzureOpenAIConfig
from .llms.hosted_vllm.chat.transformation import HostedVLLMChatConfig
from .llms.perplexity.chat.transformation import PerplexityChatConfig
from .llms.AzureOpenAI.chat.o1_transformation import AzureOpenAIO1Config
from .llms.watsonx import IBMWatsonXAIConfig
from .main import * # type: ignore
Expand Down
43 changes: 41 additions & 2 deletions litellm/integrations/prometheus.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,19 @@ def __init__(
buckets=LATENCY_BUCKETS,
)

self.litellm_llm_api_time_to_first_token_metric = Histogram(
"litellm_llm_api_time_to_first_token_metric",
"Time to first token for a models LLM API call",
labelnames=[
"model",
"hashed_api_key",
"api_key_alias",
"team",
"team_alias",
],
buckets=LATENCY_BUCKETS,
)

# Counter for spend
self.litellm_spend_metric = Counter(
"litellm_spend_metric",
Expand Down Expand Up @@ -335,14 +348,17 @@ async def async_log_success_event( # noqa: PLR0915
)

# unpack kwargs
standard_logging_payload: StandardLoggingPayload = kwargs.get(
"standard_logging_object", {}
standard_logging_payload: Optional[StandardLoggingPayload] = kwargs.get(
"standard_logging_object"
)
if standard_logging_payload is None:
raise ValueError("standard_logging_object is required")
model = kwargs.get("model", "")
litellm_params = kwargs.get("litellm_params", {}) or {}
_metadata = litellm_params.get("metadata", {})
proxy_server_request = litellm_params.get("proxy_server_request") or {}
end_user_id = proxy_server_request.get("body", {}).get("user", None)
model_parameters: dict = standard_logging_payload["model_parameters"]
user_id = standard_logging_payload["metadata"]["user_api_key_user_id"]
user_api_key = standard_logging_payload["metadata"]["user_api_key_hash"]
user_api_key_alias = standard_logging_payload["metadata"]["user_api_key_alias"]
Expand Down Expand Up @@ -468,6 +484,28 @@ async def async_log_success_event( # noqa: PLR0915
total_time_seconds = total_time.total_seconds()
api_call_start_time = kwargs.get("api_call_start_time", None)

completion_start_time = kwargs.get("completion_start_time", None)

if (
completion_start_time is not None
and isinstance(completion_start_time, datetime)
and model_parameters.get("stream")
is True # only emit for streaming requests
):
time_to_first_token_seconds = (
completion_start_time - api_call_start_time
).total_seconds()
self.litellm_llm_api_time_to_first_token_metric.labels(
model,
user_api_key,
user_api_key_alias,
user_api_team,
user_api_team_alias,
).observe(time_to_first_token_seconds)
else:
verbose_logger.debug(
"Time to first token metric not emitted, stream option in model_parameters is not True"
)
if api_call_start_time is not None and isinstance(
api_call_start_time, datetime
):
Expand Down Expand Up @@ -512,6 +550,7 @@ async def async_log_failure_event(self, kwargs, response_obj, start_time, end_ti
"standard_logging_object", {}
)
proxy_server_request = litellm_params.get("proxy_server_request") or {}

end_user_id = proxy_server_request.get("body", {}).get("user", None)
user_id = standard_logging_payload["metadata"]["user_api_key_user_id"]
user_api_key = standard_logging_payload["metadata"]["user_api_key_hash"]
Expand Down
Loading

0 comments on commit dbbd0f2

Please sign in to comment.