From c8da57710f88282f73bc910ef4169d3d31200d6c Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Fri, 26 Jan 2024 10:34:16 -0800 Subject: [PATCH 1/7] (chore) bump poetry lock --- poetry.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/poetry.lock b/poetry.lock index 7e58f02bc862..44e21dd7674f 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1158,13 +1158,13 @@ files = [ [[package]] name = "openai" -version = "1.8.0" +version = "1.10.0" description = "The official Python library for the openai API" optional = false python-versions = ">=3.7.1" files = [ - {file = "openai-1.8.0-py3-none-any.whl", hash = "sha256:0f8f53805826103fdd8adaf379ad3ec23f9d867e698cbc14caf34b778d150175"}, - {file = "openai-1.8.0.tar.gz", hash = "sha256:93366be27802f517e89328801913d2a5ede45e3b86fdcab420385b8a1b88c767"}, + {file = "openai-1.10.0-py3-none-any.whl", hash = "sha256:aa69e97d0223ace9835fbf9c997abe9ee95318f684fd2de6d02c870700c71ebc"}, + {file = "openai-1.10.0.tar.gz", hash = "sha256:208886cb501b930dc63f48d51db9c15e5380380f80516d07332adad67c9f1053"}, ] [package.dependencies] From 17370dc50fab5586b25c7a26f09545cadf39d6ff Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Fri, 26 Jan 2024 10:37:01 -0800 Subject: [PATCH 2/7] (test) dimension param - openai --- litellm/tests/test_embedding.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/litellm/tests/test_embedding.py b/litellm/tests/test_embedding.py index 42ac6f7f9d3c..a005a6ad1688 100644 --- a/litellm/tests/test_embedding.py +++ b/litellm/tests/test_embedding.py @@ -64,7 +64,9 @@ def test_openai_embedding_3(): model="text-embedding-3-small", input=["good morning from litellm", "this is another item"], metadata={"anything": "good day"}, + dimensions=5, ) + print(f"response:", response) litellm_response = dict(response) litellm_response_keys = set(litellm_response.keys()) litellm_response_keys.discard("_response_ms") @@ -80,6 +82,7 @@ def test_openai_embedding_3(): response = client.embeddings.create( model="text-embedding-3-small", input=["good morning from litellm", "this is another item"], + dimensions=5, ) response = dict(response) From 479add6b96b959955c9c0c376d3f106724b31629 Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Fri, 26 Jan 2024 10:54:34 -0800 Subject: [PATCH 3/7] (feat) add support for dimensions param --- litellm/main.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/litellm/main.py b/litellm/main.py index f9f1139f69b6..929b80ee0ad5 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -2224,6 +2224,7 @@ def embedding( model, input=[], # Optional params + dimensions: Optional[int] = None, timeout=600, # default to 10 minutes # set api_base, api_version, api_key api_base: Optional[str] = None, @@ -2244,6 +2245,7 @@ def embedding( Parameters: - model: The embedding model to use. - input: The input for which embeddings are to be generated. + - dimensions: The number of dimensions the resulting output embeddings should have. Only supported in text-embedding-3 and later models. - timeout: The timeout value for the API call, default 10 mins - litellm_call_id: The call ID for litellm logging. - litellm_logging_obj: The litellm logging object. @@ -2277,6 +2279,7 @@ def embedding( output_cost_per_second = kwargs.get("output_cost_per_second", None) openai_params = [ "user", + "dimensions", "request_timeout", "api_base", "api_version", @@ -2345,7 +2348,9 @@ def embedding( api_key=api_key, ) optional_params = get_optional_params_embeddings( + model=model, user=user, + dimensions=dimensions, encoding_format=encoding_format, custom_llm_provider=custom_llm_provider, **non_default_params, From 0fc8876ea2678195045c6e0bd622e775c28c18f4 Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Fri, 26 Jan 2024 10:55:38 -0800 Subject: [PATCH 4/7] (feat) support dimensions param --- litellm/utils.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/litellm/utils.py b/litellm/utils.py index b0e48bbc6e23..d1611b075de0 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -3313,8 +3313,10 @@ def get_optional_params_image_gen( def get_optional_params_embeddings( # 2 optional params + model=None, user=None, encoding_format=None, + dimensions=None, custom_llm_provider="", **kwargs, ): @@ -3325,7 +3327,7 @@ def get_optional_params_embeddings( for k, v in special_params.items(): passed_params[k] = v - default_params = {"user": None, "encoding_format": None} + default_params = {"user": None, "encoding_format": None, "dimensions": None} non_default_params = { k: v @@ -3333,6 +3335,19 @@ def get_optional_params_embeddings( if (k in default_params and v != default_params[k]) } ## raise exception if non-default value passed for non-openai/azure embedding calls + if custom_llm_provider == "openai": + # 'dimensions` is only supported in `text-embedding-3` and later models + + if ( + model is not None + and "text-embedding-3" not in model + and "dimensions" in non_default_params.keys() + ): + raise UnsupportedParamsError( + status_code=500, + message=f"Setting dimensions is not supported for OpenAI `text-embedding-3` and later models. To drop it from the call, set `litellm.drop_params = True`.", + ) + if ( custom_llm_provider != "openai" and custom_llm_provider != "azure" From 2a1104d1cfef826e18637234e7c2f983d4b06826 Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Fri, 26 Jan 2024 13:18:27 -0800 Subject: [PATCH 5/7] (fix) SpendLogs Table --- litellm/proxy/_types.py | 6 +++--- litellm/proxy/proxy_config.yaml | 8 +++++++- litellm/proxy/schema.prisma | 6 +++--- schema.prisma | 6 +++--- tests/test_keys.py | 12 +++++++++--- 5 files changed, 25 insertions(+), 13 deletions(-) diff --git a/litellm/proxy/_types.py b/litellm/proxy/_types.py index a3d1b4815d1d..9a5acc440641 100644 --- a/litellm/proxy/_types.py +++ b/litellm/proxy/_types.py @@ -346,9 +346,9 @@ class LiteLLM_SpendLogs(LiteLLMBase): model: Optional[str] = "" call_type: str spend: Optional[float] = 0.0 - total_tokens: Optional[float] = 0.0 - prompt_tokens: Optional[float] = 0.0 - completion_tokens: Optional[float] = 0.0 + total_tokens: Optional[int] = 0 + prompt_tokens: Optional[int] = 0 + completion_tokens: Optional[int] = 0 startTime: Union[str, datetime, None] endTime: Union[str, datetime, None] user: Optional[str] = "" diff --git a/litellm/proxy/proxy_config.yaml b/litellm/proxy/proxy_config.yaml index 7cb2714f4207..aa950c035033 100644 --- a/litellm/proxy/proxy_config.yaml +++ b/litellm/proxy/proxy_config.yaml @@ -11,6 +11,12 @@ model_list: output_cost_per_token: 0.00003 max_tokens: 4096 base_model: gpt-3.5-turbo + - model_name: gpt-4 + litellm_params: + model: azure/chatgpt-v-2 + api_base: https://openai-gpt-4-test-v-1.openai.azure.com/ + api_version: "2023-05-15" + api_key: os.environ/AZURE_API_KEY # The `os.environ/` prefix tells litellm to read this from the env. See https://docs.litellm.ai/docs/simple_proxy#load-api-keys-from-vault - model_name: gpt-vision litellm_params: model: azure/gpt-4-vision @@ -61,7 +67,7 @@ model_list: litellm_settings: fallbacks: [{"openai-gpt-3.5": ["azure-gpt-3.5"]}] success_callback: ['langfuse'] - max_budget: 0.025 # global budget for proxy + max_budget: 10 # global budget for proxy budget_duration: 30d # global budget duration, will reset after 30d # cache: True # setting callback class diff --git a/litellm/proxy/schema.prisma b/litellm/proxy/schema.prisma index 2d8b0e6621d3..2eb6332092ac 100644 --- a/litellm/proxy/schema.prisma +++ b/litellm/proxy/schema.prisma @@ -50,9 +50,9 @@ model LiteLLM_SpendLogs { call_type String api_key String @default ("") spend Float @default(0.0) - total_tokens Float @default(0.0) - prompt_tokens Float @default(0.0) - completion_tokens Float @default(0.0) + total_tokens Int @default(0) + prompt_tokens Int @default(0) + completion_tokens Int @default(0) startTime DateTime // Assuming start_time is a DateTime field endTime DateTime // Assuming end_time is a DateTime field model String @default("") diff --git a/schema.prisma b/schema.prisma index 103186aaed69..0882c650c81b 100644 --- a/schema.prisma +++ b/schema.prisma @@ -53,9 +53,9 @@ model LiteLLM_SpendLogs { call_type String api_key String @default ("") spend Float @default(0.0) - total_tokens Float @default(0.0) - prompt_tokens Float @default(0.0) - completion_tokens Float @default(0.0) + total_tokens Int @default(0) + prompt_tokens Int @default(0) + completion_tokens Int @default(0) startTime DateTime // Assuming start_time is a DateTime field endTime DateTime // Assuming end_time is a DateTime field model String @default("") diff --git a/tests/test_keys.py b/tests/test_keys.py index 348be63af3f0..a296ef13eb36 100644 --- a/tests/test_keys.py +++ b/tests/test_keys.py @@ -281,14 +281,20 @@ async def test_key_info_spend_values(): await asyncio.sleep(5) spend_logs = await get_spend_logs(session=session, request_id=response["id"]) print(f"spend_logs: {spend_logs}") - usage = spend_logs[0]["usage"] + completion_tokens = spend_logs[0]["completion_tokens"] + prompt_tokens = spend_logs[0]["prompt_tokens"] + print(f"prompt_tokens: {prompt_tokens}; completion_tokens: {completion_tokens}") + + litellm.set_verbose = True prompt_cost, completion_cost = litellm.cost_per_token( model="gpt-35-turbo", - prompt_tokens=usage["prompt_tokens"], - completion_tokens=usage["completion_tokens"], + prompt_tokens=prompt_tokens, + completion_tokens=completion_tokens, custom_llm_provider="azure", ) + print("prompt_cost: ", prompt_cost, "completion_cost: ", completion_cost) response_cost = prompt_cost + completion_cost + print(f"response_cost: {response_cost}") await asyncio.sleep(5) # allow db log to be updated key_info = await get_key_info(session=session, get_key=key, call_key=key) print( From 273e6d190565c01f0c121918260d9518d1e60e5e Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Fri, 26 Jan 2024 13:26:49 -0800 Subject: [PATCH 6/7] Revert "(fix) SpendLogs Table" This reverts commit 2a1104d1cfef826e18637234e7c2f983d4b06826. --- litellm/proxy/_types.py | 6 +++--- litellm/proxy/proxy_config.yaml | 8 +------- litellm/proxy/schema.prisma | 6 +++--- schema.prisma | 6 +++--- tests/test_keys.py | 12 +++--------- 5 files changed, 13 insertions(+), 25 deletions(-) diff --git a/litellm/proxy/_types.py b/litellm/proxy/_types.py index 9a5acc440641..a3d1b4815d1d 100644 --- a/litellm/proxy/_types.py +++ b/litellm/proxy/_types.py @@ -346,9 +346,9 @@ class LiteLLM_SpendLogs(LiteLLMBase): model: Optional[str] = "" call_type: str spend: Optional[float] = 0.0 - total_tokens: Optional[int] = 0 - prompt_tokens: Optional[int] = 0 - completion_tokens: Optional[int] = 0 + total_tokens: Optional[float] = 0.0 + prompt_tokens: Optional[float] = 0.0 + completion_tokens: Optional[float] = 0.0 startTime: Union[str, datetime, None] endTime: Union[str, datetime, None] user: Optional[str] = "" diff --git a/litellm/proxy/proxy_config.yaml b/litellm/proxy/proxy_config.yaml index aa950c035033..7cb2714f4207 100644 --- a/litellm/proxy/proxy_config.yaml +++ b/litellm/proxy/proxy_config.yaml @@ -11,12 +11,6 @@ model_list: output_cost_per_token: 0.00003 max_tokens: 4096 base_model: gpt-3.5-turbo - - model_name: gpt-4 - litellm_params: - model: azure/chatgpt-v-2 - api_base: https://openai-gpt-4-test-v-1.openai.azure.com/ - api_version: "2023-05-15" - api_key: os.environ/AZURE_API_KEY # The `os.environ/` prefix tells litellm to read this from the env. See https://docs.litellm.ai/docs/simple_proxy#load-api-keys-from-vault - model_name: gpt-vision litellm_params: model: azure/gpt-4-vision @@ -67,7 +61,7 @@ model_list: litellm_settings: fallbacks: [{"openai-gpt-3.5": ["azure-gpt-3.5"]}] success_callback: ['langfuse'] - max_budget: 10 # global budget for proxy + max_budget: 0.025 # global budget for proxy budget_duration: 30d # global budget duration, will reset after 30d # cache: True # setting callback class diff --git a/litellm/proxy/schema.prisma b/litellm/proxy/schema.prisma index 2eb6332092ac..2d8b0e6621d3 100644 --- a/litellm/proxy/schema.prisma +++ b/litellm/proxy/schema.prisma @@ -50,9 +50,9 @@ model LiteLLM_SpendLogs { call_type String api_key String @default ("") spend Float @default(0.0) - total_tokens Int @default(0) - prompt_tokens Int @default(0) - completion_tokens Int @default(0) + total_tokens Float @default(0.0) + prompt_tokens Float @default(0.0) + completion_tokens Float @default(0.0) startTime DateTime // Assuming start_time is a DateTime field endTime DateTime // Assuming end_time is a DateTime field model String @default("") diff --git a/schema.prisma b/schema.prisma index 0882c650c81b..103186aaed69 100644 --- a/schema.prisma +++ b/schema.prisma @@ -53,9 +53,9 @@ model LiteLLM_SpendLogs { call_type String api_key String @default ("") spend Float @default(0.0) - total_tokens Int @default(0) - prompt_tokens Int @default(0) - completion_tokens Int @default(0) + total_tokens Float @default(0.0) + prompt_tokens Float @default(0.0) + completion_tokens Float @default(0.0) startTime DateTime // Assuming start_time is a DateTime field endTime DateTime // Assuming end_time is a DateTime field model String @default("") diff --git a/tests/test_keys.py b/tests/test_keys.py index a296ef13eb36..348be63af3f0 100644 --- a/tests/test_keys.py +++ b/tests/test_keys.py @@ -281,20 +281,14 @@ async def test_key_info_spend_values(): await asyncio.sleep(5) spend_logs = await get_spend_logs(session=session, request_id=response["id"]) print(f"spend_logs: {spend_logs}") - completion_tokens = spend_logs[0]["completion_tokens"] - prompt_tokens = spend_logs[0]["prompt_tokens"] - print(f"prompt_tokens: {prompt_tokens}; completion_tokens: {completion_tokens}") - - litellm.set_verbose = True + usage = spend_logs[0]["usage"] prompt_cost, completion_cost = litellm.cost_per_token( model="gpt-35-turbo", - prompt_tokens=prompt_tokens, - completion_tokens=completion_tokens, + prompt_tokens=usage["prompt_tokens"], + completion_tokens=usage["completion_tokens"], custom_llm_provider="azure", ) - print("prompt_cost: ", prompt_cost, "completion_cost: ", completion_cost) response_cost = prompt_cost + completion_cost - print(f"response_cost: {response_cost}") await asyncio.sleep(5) # allow db log to be updated key_info = await get_key_info(session=session, get_key=key, call_key=key) print( From 65fd405bd48d7aaacbad83eb7137863969336d95 Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Fri, 26 Jan 2024 13:33:11 -0800 Subject: [PATCH 7/7] (docs) dimensions embedding param --- .../docs/embedding/supported_embedding.md | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/docs/my-website/docs/embedding/supported_embedding.md b/docs/my-website/docs/embedding/supported_embedding.md index 735aa01c864a..d864c5796c05 100644 --- a/docs/my-website/docs/embedding/supported_embedding.md +++ b/docs/my-website/docs/embedding/supported_embedding.md @@ -13,8 +13,8 @@ response = embedding(model='text-embedding-ada-002', input=["good morning from l - `model`: *string* - ID of the model to use. `model='text-embedding-ada-002'` -- `input`: *array* - Input text to embed, encoded as a string or array of tokens. To embed multiple inputs in a single request, pass an array of strings or array of token arrays. The input must not exceed the max input tokens for the model (8192 tokens for text-embedding-ada-002), cannot be an empty string, and any array must be 2048 dimensions or less. -``` +- `input`: *string or array* - Input text to embed, encoded as a string or array of tokens. To embed multiple inputs in a single request, pass an array of strings or array of token arrays. The input must not exceed the max input tokens for the model (8192 tokens for text-embedding-ada-002), cannot be an empty string, and any array must be 2048 dimensions or less. +```python input=["good morning from litellm"] ``` @@ -22,7 +22,11 @@ input=["good morning from litellm"] - `user`: *string (optional)* A unique identifier representing your end-user, -- `timeout`: *integer* - The maximum time, in seconds, to wait for the API to respond. Defaults to 600 seconds (10 minutes). +- `dimensions`: *integer (Optional)* The number of dimensions the resulting output embeddings should have. Only supported in OpenAI/Azure text-embedding-3 and later models. + +- `encoding_format`: *string (Optional)* The format to return the embeddings in. Can be either `"float"` or `"base64"`. Defaults to `encoding_format="float"` + +- `timeout`: *integer (Optional)* - The maximum time, in seconds, to wait for the API to respond. Defaults to 600 seconds (10 minutes). - `api_base`: *string (optional)* - The api endpoint you want to call the model with @@ -66,7 +70,12 @@ input=["good morning from litellm"] from litellm import embedding import os os.environ['OPENAI_API_KEY'] = "" -response = embedding('text-embedding-ada-002', input=["good morning from litellm"]) +response = embedding( + model="text-embedding-3-small", + input=["good morning from litellm", "this is another item"], + metadata={"anything": "good day"}, + dimensions=5 # Only supported in text-embedding-3 and later models. +) ``` | Model Name | Function Call | Required OS Variables |