LiteLLM Minor Fixes & Improvements (10/24/2024) (#6421)

* fix(utils.py): support passing dynamic api base to validate_environment Returns True if just api base is required and api base is passed * fix(litellm_pre_call_utils.py): feature flag sending client headers to llm api Fixes #6410 * fix(anthropic/chat/transformation.py): return correct error message * fix(http_handler.py): add error response text in places where we expect it * fix(factory.py): handle base case of no non-system messages to bedrock Fixes #6411 * feat(cohere/embed): Support cohere image embeddings Closes #6413 * fix(__init__.py): fix linting error * docs(supported_embedding.md): add image embedding example to docs * feat(cohere/embed): use cohere embedding returned usage for cost calc * build(model_prices_and_context_window.json): add embed-english-v3.0 details (image cost + 'supports_image_input' flag) * fix(cohere_transformation.py): fix linting error * test(test_proxy_server.py): cleanup test * test: cleanup test * fix: fix linting errors
BerriAI · Oct 25, 2024 · c03e5da · c03e5da
1 parent 38708a3
commit c03e5da
Show file tree

Hide file tree

Showing 23 changed files with 417 additions and 150 deletions.
diff --git a/docs/my-website/docs/embedding/supported_embedding.md b/docs/my-website/docs/embedding/supported_embedding.md
@@ -84,6 +84,60 @@ print(query_result[:5])
 </TabItem>
 </Tabs>
 
+
+## Image Embeddings
+
+For models that support image embeddings, you can pass in a base64 encoded image string to the `input` param.
+
+<Tabs>
+<TabItem value="sdk" label="SDK">
+
+```python
+from litellm import embedding
+import os
+
+# set your api key
+os.environ["COHERE_API_KEY"] = ""
+
+response = embedding(model="cohere/embed-english-v3.0", input=["<base64 encoded image>"])
+```
+
+</TabItem>
+<TabItem value="proxy" label="PROXY">
+
+1. Setup config.yaml 
+
+```yaml
+model_list:
+  - model_name: cohere-embed
+    litellm_params:
+      model: cohere/embed-english-v3.0
+      api_key: os.environ/COHERE_API_KEY
+```
+
+
+2. Start proxy
+
+```bash
+litellm --config /path/to/config.yaml 
+
+# RUNNING on http://0.0.0.0:4000
+```
+
+3. Test it!
+
+```bash
+curl -X POST 'http://0.0.0.0:4000/v1/embeddings' \
+-H 'Authorization: Bearer sk-54d77cd67b9febbb' \
+-H 'Content-Type: application/json' \
+-d '{
+  "model": "cohere/embed-english-v3.0",
+  "input": ["<base64 encoded image>"]
+}'
+```
+</TabItem>
+</Tabs>
+
 ## Input Params for `litellm.embedding()`
 
 

diff --git a/docs/my-website/docs/proxy/configs.md b/docs/my-website/docs/proxy/configs.md
@@ -814,6 +814,7 @@ general_settings:
 | pass_through_endpoints | List[Dict[str, Any]] | Define the pass through endpoints. [Docs](./pass_through) |
 | enable_oauth2_proxy_auth | boolean | (Enterprise Feature) If true, enables oauth2.0 authentication |
 | forward_openai_org_id | boolean | If true, forwards the OpenAI Organization ID to the backend LLM call (if it's OpenAI). |
+| forward_client_headers_to_llm_api | boolean | If true, forwards the client headers (any `x-` headers) to the backend LLM call |
 
 ### router_settings - Reference
 

diff --git a/litellm/__init__.py b/litellm/__init__.py
@@ -8,6 +8,7 @@
 from typing import Callable, List, Optional, Dict, Union, Any, Literal, get_args
 from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
 from litellm.caching.caching import Cache, DualCache, RedisCache, InMemoryCache
+from litellm.types.llms.bedrock import COHERE_EMBEDDING_INPUT_TYPES
 from litellm._logging import (
     set_verbose,
     _turn_on_debug,
@@ -136,7 +137,7 @@
 ### DEFAULT AZURE API VERSION ###
 AZURE_DEFAULT_API_VERSION = "2024-08-01-preview"  # this is updated to the latest
 ### COHERE EMBEDDINGS DEFAULT TYPE ###
-COHERE_DEFAULT_EMBEDDING_INPUT_TYPE = "search_document"
+COHERE_DEFAULT_EMBEDDING_INPUT_TYPE: COHERE_EMBEDDING_INPUT_TYPES = "search_document"
 ### GUARDRAILS ###
 llamaguard_model_name: Optional[str] = None
 openai_moderations_model_name: Optional[str] = None

diff --git a/litellm/litellm_core_utils/get_llm_provider_logic.py b/litellm/litellm_core_utils/get_llm_provider_logic.py
@@ -333,6 +333,14 @@ def _get_openai_compatible_provider_info(  # noqa: PLR0915
     api_key: Optional[str],
     dynamic_api_key: Optional[str],
 ) -> Tuple[str, str, Optional[str], Optional[str]]:
+    """
+    Returns:
+        Tuple[str, str, Optional[str], Optional[str]]:
+            model: str
+            custom_llm_provider: str
+            dynamic_api_key: Optional[str]
+            api_base: Optional[str]
+    """
     custom_llm_provider = model.split("/", 1)[0]
     model = model.split("/", 1)[1]
 

diff --git a/litellm/llms/anthropic/chat/handler.py b/litellm/llms/anthropic/chat/handler.py
@@ -398,6 +398,8 @@ async def acompletion_function(
             error_response = getattr(e, "response", None)
             if error_headers is None and error_response:
                 error_headers = getattr(error_response, "headers", None)
+            if error_response and hasattr(error_response, "text"):
+                error_text = getattr(error_response, "text", error_text)
             raise AnthropicError(
                 message=error_text,
                 status_code=status_code,

diff --git a/litellm/llms/azure_ai/embed/handler.py b/litellm/llms/azure_ai/embed/handler.py
@@ -9,7 +9,7 @@
 from openai import OpenAI
 
 import litellm
-from litellm.llms.cohere.embed import embedding as cohere_embedding
+from litellm.llms.cohere.embed.handler import embedding as cohere_embedding
 from litellm.llms.custom_httpx.http_handler import (
     AsyncHTTPHandler,
     HTTPHandler,

diff --git a/litellm/llms/bedrock/embed/cohere_transformation.py b/litellm/llms/bedrock/embed/cohere_transformation.py
@@ -7,6 +7,7 @@
 from typing import List
 
 import litellm
+from litellm.llms.cohere.embed.transformation import CohereEmbeddingConfig
 from litellm.types.llms.bedrock import CohereEmbeddingRequest, CohereEmbeddingResponse
 from litellm.types.utils import Embedding, EmbeddingResponse
 
@@ -26,15 +27,21 @@ def map_openai_params(
                 optional_params["embedding_types"] = v
         return optional_params
 
+    def _is_v3_model(self, model: str) -> bool:
+        return "3" in model
+
     def _transform_request(
-        self, input: List[str], inference_params: dict
+        self, model: str, input: List[str], inference_params: dict
     ) -> CohereEmbeddingRequest:
-        transformed_request = CohereEmbeddingRequest(
-            texts=input,
-            input_type=litellm.COHERE_DEFAULT_EMBEDDING_INPUT_TYPE,  # type: ignore
+        transformed_request = CohereEmbeddingConfig()._transform_request(
+            model, input, inference_params
         )
 
-        for k, v in inference_params.items():
-            transformed_request[k] = v  # type: ignore
+        new_transformed_request = CohereEmbeddingRequest(
+            input_type=transformed_request["input_type"],
+        )
+        for k in CohereEmbeddingRequest.__annotations__.keys():
+            if k in transformed_request:
+                new_transformed_request[k] = transformed_request[k]  # type: ignore
 
-        return transformed_request
+        return new_transformed_request
diff --git a/litellm/llms/bedrock/embed/embedding.py b/litellm/llms/bedrock/embed/embedding.py
@@ -11,7 +11,7 @@
 import httpx
 
 import litellm
-from litellm.llms.cohere.embed import embedding as cohere_embedding
+from litellm.llms.cohere.embed.handler import embedding as cohere_embedding
 from litellm.llms.custom_httpx.http_handler import (
     AsyncHTTPHandler,
     HTTPHandler,
@@ -369,7 +369,7 @@ def embeddings(
         batch_data: Optional[List] = None
         if provider == "cohere":
             data = BedrockCohereEmbeddingConfig()._transform_request(
-                input=input, inference_params=inference_params
+                model=model, input=input, inference_params=inference_params
             )
         elif provider == "amazon" and model in [
             "amazon.titan-embed-image-v1",

diff --git a/litellm/llms/cohere/embed.py → litellm/llms/cohere/embed/handler.py b/litellm/llms/cohere/embed.py → litellm/llms/cohere/embed/handler.py
@@ -12,8 +12,11 @@
 import litellm
 from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
 from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
+from litellm.types.llms.bedrock import CohereEmbeddingRequest
 from litellm.utils import Choices, Message, ModelResponse, Usage
 
+from .transformation import CohereEmbeddingConfig
+
 
 def validate_environment(api_key, headers: dict):
     headers.update(
@@ -41,39 +44,9 @@ def __init__(self, status_code, message):
         )  # Call the base class constructor with the parameters it needs
 
 
-def _process_embedding_response(
-    embeddings: list,
-    model_response: litellm.EmbeddingResponse,
-    model: str,
-    encoding: Any,
-    input: list,
-) -> litellm.EmbeddingResponse:
-    output_data = []
-    for idx, embedding in enumerate(embeddings):
-        output_data.append(
-            {"object": "embedding", "index": idx, "embedding": embedding}
-        )
-    model_response.object = "list"
-    model_response.data = output_data
-    model_response.model = model
-    input_tokens = 0
-    for text in input:
-        input_tokens += len(encoding.encode(text))
-
-    setattr(
-        model_response,
-        "usage",
-        Usage(
-            prompt_tokens=input_tokens, completion_tokens=0, total_tokens=input_tokens
-        ),
-    )
-
-    return model_response
-
-
 async def async_embedding(
     model: str,
-    data: dict,
+    data: Union[dict, CohereEmbeddingRequest],
     input: list,
     model_response: litellm.utils.EmbeddingResponse,
     timeout: Optional[Union[float, httpx.Timeout]],
@@ -121,19 +94,12 @@ async def async_embedding(
         )
         raise e
 
-    ## LOGGING
-    logging_obj.post_call(
-        input=input,
-        api_key=api_key,
-        additional_args={"complete_input_dict": data},
-        original_response=response.text,
-    )
-
-    embeddings = response.json()["embeddings"]
-
     ## PROCESS RESPONSE ##
-    return _process_embedding_response(
-        embeddings=embeddings,
+    return CohereEmbeddingConfig()._transform_response(
+        response=response,
+        api_key=api_key,
+        logging_obj=logging_obj,
+        data=data,
         model_response=model_response,
         model=model,
         encoding=encoding,
@@ -149,7 +115,7 @@ def embedding(
     optional_params: dict,
     headers: dict,
     encoding: Any,
-    data: Optional[dict] = None,
+    data: Optional[Union[dict, CohereEmbeddingRequest]] = None,
     complete_api_base: Optional[str] = None,
     api_key: Optional[str] = None,
     aembedding: Optional[bool] = None,
@@ -159,11 +125,10 @@ def embedding(
     headers = validate_environment(api_key, headers=headers)
     embed_url = complete_api_base or "https://api.cohere.ai/v1/embed"
     model = model
-    data = data or {"model": model, "texts": input, **optional_params}
 
-    if "3" in model and "input_type" not in data:
-        # cohere v3 embedding models require input_type, if no input_type is provided, default to "search_document"
-        data["input_type"] = "search_document"
+    data = data or CohereEmbeddingConfig()._transform_request(
+        model=model, input=input, inference_params=optional_params
+    )
 
     ## ROUTING
     if aembedding is True:
@@ -193,30 +158,12 @@ def embedding(
         client = HTTPHandler(concurrent_limit=1)
 
     response = client.post(embed_url, headers=headers, data=json.dumps(data))
-    ## LOGGING
-    logging_obj.post_call(
-        input=input,
-        api_key=api_key,
-        additional_args={"complete_input_dict": data},
-        original_response=response,
-    )
-    """
-        response 
-        {
-            'object': "list",
-            'data': [
-            
-            ]
-            'model', 
-            'usage'
-        }
-    """
-    if response.status_code != 200:
-        raise CohereError(message=response.text, status_code=response.status_code)
-    embeddings = response.json()["embeddings"]
 
-    return _process_embedding_response(
-        embeddings=embeddings,
+    return CohereEmbeddingConfig()._transform_response(
+        response=response,
+        api_key=api_key,
+        logging_obj=logging_obj,
+        data=data,
         model_response=model_response,
         model=model,
         encoding=encoding,