Skip to content

Commit

Permalink
LiteLLM Minor Fixes & Improvements (10/24/2024) (#6421)
Browse files Browse the repository at this point in the history
* fix(utils.py): support passing dynamic api base to validate_environment

Returns True if just api base is required and api base is passed

* fix(litellm_pre_call_utils.py): feature flag sending client headers to llm api

Fixes #6410

* fix(anthropic/chat/transformation.py): return correct error message

* fix(http_handler.py): add error response text in places where we expect it

* fix(factory.py): handle base case of no non-system messages to bedrock

Fixes #6411

* feat(cohere/embed): Support cohere image embeddings

Closes #6413

* fix(__init__.py): fix linting error

* docs(supported_embedding.md): add image embedding example to docs

* feat(cohere/embed): use cohere embedding returned usage for cost calc

* build(model_prices_and_context_window.json): add embed-english-v3.0 details (image cost + 'supports_image_input' flag)

* fix(cohere_transformation.py): fix linting error

* test(test_proxy_server.py): cleanup test

* test: cleanup test

* fix: fix linting errors
  • Loading branch information
krrishdholakia authored Oct 25, 2024
1 parent 38708a3 commit c03e5da
Show file tree
Hide file tree
Showing 23 changed files with 417 additions and 150 deletions.
54 changes: 54 additions & 0 deletions docs/my-website/docs/embedding/supported_embedding.md
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,60 @@ print(query_result[:5])
</TabItem>
</Tabs>


## Image Embeddings

For models that support image embeddings, you can pass in a base64 encoded image string to the `input` param.

<Tabs>
<TabItem value="sdk" label="SDK">

```python
from litellm import embedding
import os

# set your api key
os.environ["COHERE_API_KEY"] = ""

response = embedding(model="cohere/embed-english-v3.0", input=["<base64 encoded image>"])
```

</TabItem>
<TabItem value="proxy" label="PROXY">

1. Setup config.yaml

```yaml
model_list:
- model_name: cohere-embed
litellm_params:
model: cohere/embed-english-v3.0
api_key: os.environ/COHERE_API_KEY
```
2. Start proxy
```bash
litellm --config /path/to/config.yaml

# RUNNING on http://0.0.0.0:4000
```

3. Test it!

```bash
curl -X POST 'http://0.0.0.0:4000/v1/embeddings' \
-H 'Authorization: Bearer sk-54d77cd67b9febbb' \
-H 'Content-Type: application/json' \
-d '{
"model": "cohere/embed-english-v3.0",
"input": ["<base64 encoded image>"]
}'
```
</TabItem>
</Tabs>

## Input Params for `litellm.embedding()`


Expand Down
1 change: 1 addition & 0 deletions docs/my-website/docs/proxy/configs.md
Original file line number Diff line number Diff line change
Expand Up @@ -814,6 +814,7 @@ general_settings:
| pass_through_endpoints | List[Dict[str, Any]] | Define the pass through endpoints. [Docs](./pass_through) |
| enable_oauth2_proxy_auth | boolean | (Enterprise Feature) If true, enables oauth2.0 authentication |
| forward_openai_org_id | boolean | If true, forwards the OpenAI Organization ID to the backend LLM call (if it's OpenAI). |
| forward_client_headers_to_llm_api | boolean | If true, forwards the client headers (any `x-` headers) to the backend LLM call |

### router_settings - Reference

Expand Down
3 changes: 2 additions & 1 deletion litellm/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from typing import Callable, List, Optional, Dict, Union, Any, Literal, get_args
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
from litellm.caching.caching import Cache, DualCache, RedisCache, InMemoryCache
from litellm.types.llms.bedrock import COHERE_EMBEDDING_INPUT_TYPES
from litellm._logging import (
set_verbose,
_turn_on_debug,
Expand Down Expand Up @@ -136,7 +137,7 @@
### DEFAULT AZURE API VERSION ###
AZURE_DEFAULT_API_VERSION = "2024-08-01-preview" # this is updated to the latest
### COHERE EMBEDDINGS DEFAULT TYPE ###
COHERE_DEFAULT_EMBEDDING_INPUT_TYPE = "search_document"
COHERE_DEFAULT_EMBEDDING_INPUT_TYPE: COHERE_EMBEDDING_INPUT_TYPES = "search_document"
### GUARDRAILS ###
llamaguard_model_name: Optional[str] = None
openai_moderations_model_name: Optional[str] = None
Expand Down
8 changes: 8 additions & 0 deletions litellm/litellm_core_utils/get_llm_provider_logic.py
Original file line number Diff line number Diff line change
Expand Up @@ -333,6 +333,14 @@ def _get_openai_compatible_provider_info( # noqa: PLR0915
api_key: Optional[str],
dynamic_api_key: Optional[str],
) -> Tuple[str, str, Optional[str], Optional[str]]:
"""
Returns:
Tuple[str, str, Optional[str], Optional[str]]:
model: str
custom_llm_provider: str
dynamic_api_key: Optional[str]
api_base: Optional[str]
"""
custom_llm_provider = model.split("/", 1)[0]
model = model.split("/", 1)[1]

Expand Down
2 changes: 2 additions & 0 deletions litellm/llms/anthropic/chat/handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -398,6 +398,8 @@ async def acompletion_function(
error_response = getattr(e, "response", None)
if error_headers is None and error_response:
error_headers = getattr(error_response, "headers", None)
if error_response and hasattr(error_response, "text"):
error_text = getattr(error_response, "text", error_text)
raise AnthropicError(
message=error_text,
status_code=status_code,
Expand Down
2 changes: 1 addition & 1 deletion litellm/llms/azure_ai/embed/handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from openai import OpenAI

import litellm
from litellm.llms.cohere.embed import embedding as cohere_embedding
from litellm.llms.cohere.embed.handler import embedding as cohere_embedding
from litellm.llms.custom_httpx.http_handler import (
AsyncHTTPHandler,
HTTPHandler,
Expand Down
21 changes: 14 additions & 7 deletions litellm/llms/bedrock/embed/cohere_transformation.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from typing import List

import litellm
from litellm.llms.cohere.embed.transformation import CohereEmbeddingConfig
from litellm.types.llms.bedrock import CohereEmbeddingRequest, CohereEmbeddingResponse
from litellm.types.utils import Embedding, EmbeddingResponse

Expand All @@ -26,15 +27,21 @@ def map_openai_params(
optional_params["embedding_types"] = v
return optional_params

def _is_v3_model(self, model: str) -> bool:
return "3" in model

def _transform_request(
self, input: List[str], inference_params: dict
self, model: str, input: List[str], inference_params: dict
) -> CohereEmbeddingRequest:
transformed_request = CohereEmbeddingRequest(
texts=input,
input_type=litellm.COHERE_DEFAULT_EMBEDDING_INPUT_TYPE, # type: ignore
transformed_request = CohereEmbeddingConfig()._transform_request(
model, input, inference_params
)

for k, v in inference_params.items():
transformed_request[k] = v # type: ignore
new_transformed_request = CohereEmbeddingRequest(
input_type=transformed_request["input_type"],
)
for k in CohereEmbeddingRequest.__annotations__.keys():
if k in transformed_request:
new_transformed_request[k] = transformed_request[k] # type: ignore

return transformed_request
return new_transformed_request
4 changes: 2 additions & 2 deletions litellm/llms/bedrock/embed/embedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
import httpx

import litellm
from litellm.llms.cohere.embed import embedding as cohere_embedding
from litellm.llms.cohere.embed.handler import embedding as cohere_embedding
from litellm.llms.custom_httpx.http_handler import (
AsyncHTTPHandler,
HTTPHandler,
Expand Down Expand Up @@ -369,7 +369,7 @@ def embeddings(
batch_data: Optional[List] = None
if provider == "cohere":
data = BedrockCohereEmbeddingConfig()._transform_request(
input=input, inference_params=inference_params
model=model, input=input, inference_params=inference_params
)
elif provider == "amazon" and model in [
"amazon.titan-embed-image-v1",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,11 @@
import litellm
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
from litellm.types.llms.bedrock import CohereEmbeddingRequest
from litellm.utils import Choices, Message, ModelResponse, Usage

from .transformation import CohereEmbeddingConfig


def validate_environment(api_key, headers: dict):
headers.update(
Expand Down Expand Up @@ -41,39 +44,9 @@ def __init__(self, status_code, message):
) # Call the base class constructor with the parameters it needs


def _process_embedding_response(
embeddings: list,
model_response: litellm.EmbeddingResponse,
model: str,
encoding: Any,
input: list,
) -> litellm.EmbeddingResponse:
output_data = []
for idx, embedding in enumerate(embeddings):
output_data.append(
{"object": "embedding", "index": idx, "embedding": embedding}
)
model_response.object = "list"
model_response.data = output_data
model_response.model = model
input_tokens = 0
for text in input:
input_tokens += len(encoding.encode(text))

setattr(
model_response,
"usage",
Usage(
prompt_tokens=input_tokens, completion_tokens=0, total_tokens=input_tokens
),
)

return model_response


async def async_embedding(
model: str,
data: dict,
data: Union[dict, CohereEmbeddingRequest],
input: list,
model_response: litellm.utils.EmbeddingResponse,
timeout: Optional[Union[float, httpx.Timeout]],
Expand Down Expand Up @@ -121,19 +94,12 @@ async def async_embedding(
)
raise e

## LOGGING
logging_obj.post_call(
input=input,
api_key=api_key,
additional_args={"complete_input_dict": data},
original_response=response.text,
)

embeddings = response.json()["embeddings"]

## PROCESS RESPONSE ##
return _process_embedding_response(
embeddings=embeddings,
return CohereEmbeddingConfig()._transform_response(
response=response,
api_key=api_key,
logging_obj=logging_obj,
data=data,
model_response=model_response,
model=model,
encoding=encoding,
Expand All @@ -149,7 +115,7 @@ def embedding(
optional_params: dict,
headers: dict,
encoding: Any,
data: Optional[dict] = None,
data: Optional[Union[dict, CohereEmbeddingRequest]] = None,
complete_api_base: Optional[str] = None,
api_key: Optional[str] = None,
aembedding: Optional[bool] = None,
Expand All @@ -159,11 +125,10 @@ def embedding(
headers = validate_environment(api_key, headers=headers)
embed_url = complete_api_base or "https://api.cohere.ai/v1/embed"
model = model
data = data or {"model": model, "texts": input, **optional_params}

if "3" in model and "input_type" not in data:
# cohere v3 embedding models require input_type, if no input_type is provided, default to "search_document"
data["input_type"] = "search_document"
data = data or CohereEmbeddingConfig()._transform_request(
model=model, input=input, inference_params=optional_params
)

## ROUTING
if aembedding is True:
Expand Down Expand Up @@ -193,30 +158,12 @@ def embedding(
client = HTTPHandler(concurrent_limit=1)

response = client.post(embed_url, headers=headers, data=json.dumps(data))
## LOGGING
logging_obj.post_call(
input=input,
api_key=api_key,
additional_args={"complete_input_dict": data},
original_response=response,
)
"""
response
{
'object': "list",
'data': [
]
'model',
'usage'
}
"""
if response.status_code != 200:
raise CohereError(message=response.text, status_code=response.status_code)
embeddings = response.json()["embeddings"]

return _process_embedding_response(
embeddings=embeddings,
return CohereEmbeddingConfig()._transform_response(
response=response,
api_key=api_key,
logging_obj=logging_obj,
data=data,
model_response=model_response,
model=model,
encoding=encoding,
Expand Down
Loading

0 comments on commit c03e5da

Please sign in to comment.