From c8da57710f88282f73bc910ef4169d3d31200d6c Mon Sep 17 00:00:00 2001
From: ishaan-jaff <ishaanjaffer0324@gmail.com>
Date: Fri, 26 Jan 2024 10:34:16 -0800
Subject: [PATCH 1/7] (chore) bump poetry lock

---
 poetry.lock | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index 7e58f02bc862..44e21dd7674f 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1158,13 +1158,13 @@ files = [
 
 [[package]]
 name = "openai"
-version = "1.8.0"
+version = "1.10.0"
 description = "The official Python library for the openai API"
 optional = false
 python-versions = ">=3.7.1"
 files = [
-    {file = "openai-1.8.0-py3-none-any.whl", hash = "sha256:0f8f53805826103fdd8adaf379ad3ec23f9d867e698cbc14caf34b778d150175"},
-    {file = "openai-1.8.0.tar.gz", hash = "sha256:93366be27802f517e89328801913d2a5ede45e3b86fdcab420385b8a1b88c767"},
+    {file = "openai-1.10.0-py3-none-any.whl", hash = "sha256:aa69e97d0223ace9835fbf9c997abe9ee95318f684fd2de6d02c870700c71ebc"},
+    {file = "openai-1.10.0.tar.gz", hash = "sha256:208886cb501b930dc63f48d51db9c15e5380380f80516d07332adad67c9f1053"},
 ]
 
 [package.dependencies]

From 17370dc50fab5586b25c7a26f09545cadf39d6ff Mon Sep 17 00:00:00 2001
From: ishaan-jaff <ishaanjaffer0324@gmail.com>
Date: Fri, 26 Jan 2024 10:37:01 -0800
Subject: [PATCH 2/7] (test) dimension param - openai

---
 litellm/tests/test_embedding.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/litellm/tests/test_embedding.py b/litellm/tests/test_embedding.py
index 42ac6f7f9d3c..a005a6ad1688 100644
--- a/litellm/tests/test_embedding.py
+++ b/litellm/tests/test_embedding.py
@@ -64,7 +64,9 @@ def test_openai_embedding_3():
             model="text-embedding-3-small",
             input=["good morning from litellm", "this is another item"],
             metadata={"anything": "good day"},
+            dimensions=5,
         )
+        print(f"response:", response)
         litellm_response = dict(response)
         litellm_response_keys = set(litellm_response.keys())
         litellm_response_keys.discard("_response_ms")
@@ -80,6 +82,7 @@ def test_openai_embedding_3():
         response = client.embeddings.create(
             model="text-embedding-3-small",
             input=["good morning from litellm", "this is another item"],
+            dimensions=5,
         )
 
         response = dict(response)

From 479add6b96b959955c9c0c376d3f106724b31629 Mon Sep 17 00:00:00 2001
From: ishaan-jaff <ishaanjaffer0324@gmail.com>
Date: Fri, 26 Jan 2024 10:54:34 -0800
Subject: [PATCH 3/7] (feat) add support for dimensions param

---
 litellm/main.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/litellm/main.py b/litellm/main.py
index f9f1139f69b6..929b80ee0ad5 100644
--- a/litellm/main.py
+++ b/litellm/main.py
@@ -2224,6 +2224,7 @@ def embedding(
     model,
     input=[],
     # Optional params
+    dimensions: Optional[int] = None,
     timeout=600,  # default to 10 minutes
     # set api_base, api_version, api_key
     api_base: Optional[str] = None,
@@ -2244,6 +2245,7 @@ def embedding(
     Parameters:
     - model: The embedding model to use.
     - input: The input for which embeddings are to be generated.
+    - dimensions: The number of dimensions the resulting output embeddings should have. Only supported in text-embedding-3 and later models.
     - timeout: The timeout value for the API call, default 10 mins
     - litellm_call_id: The call ID for litellm logging.
     - litellm_logging_obj: The litellm logging object.
@@ -2277,6 +2279,7 @@ def embedding(
     output_cost_per_second = kwargs.get("output_cost_per_second", None)
     openai_params = [
         "user",
+        "dimensions",
         "request_timeout",
         "api_base",
         "api_version",
@@ -2345,7 +2348,9 @@ def embedding(
         api_key=api_key,
     )
     optional_params = get_optional_params_embeddings(
+        model=model,
         user=user,
+        dimensions=dimensions,
         encoding_format=encoding_format,
         custom_llm_provider=custom_llm_provider,
         **non_default_params,

From 0fc8876ea2678195045c6e0bd622e775c28c18f4 Mon Sep 17 00:00:00 2001
From: ishaan-jaff <ishaanjaffer0324@gmail.com>
Date: Fri, 26 Jan 2024 10:55:38 -0800
Subject: [PATCH 4/7] (feat) support dimensions param

---
 litellm/utils.py | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/litellm/utils.py b/litellm/utils.py
index b0e48bbc6e23..d1611b075de0 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -3313,8 +3313,10 @@ def get_optional_params_image_gen(
 
 def get_optional_params_embeddings(
     # 2 optional params
+    model=None,
     user=None,
     encoding_format=None,
+    dimensions=None,
     custom_llm_provider="",
     **kwargs,
 ):
@@ -3325,7 +3327,7 @@ def get_optional_params_embeddings(
     for k, v in special_params.items():
         passed_params[k] = v
 
-    default_params = {"user": None, "encoding_format": None}
+    default_params = {"user": None, "encoding_format": None, "dimensions": None}
 
     non_default_params = {
         k: v
@@ -3333,6 +3335,19 @@ def get_optional_params_embeddings(
         if (k in default_params and v != default_params[k])
     }
     ## raise exception if non-default value passed for non-openai/azure embedding calls
+    if custom_llm_provider == "openai":
+        # 'dimensions` is only supported in `text-embedding-3` and later models
+
+        if (
+            model is not None
+            and "text-embedding-3" not in model
+            and "dimensions" in non_default_params.keys()
+        ):
+            raise UnsupportedParamsError(
+                status_code=500,
+                message=f"Setting dimensions is not supported for OpenAI `text-embedding-3` and later models. To drop it from the call, set `litellm.drop_params = True`.",
+            )
+
     if (
         custom_llm_provider != "openai"
         and custom_llm_provider != "azure"

From 2a1104d1cfef826e18637234e7c2f983d4b06826 Mon Sep 17 00:00:00 2001
From: ishaan-jaff <ishaanjaffer0324@gmail.com>
Date: Fri, 26 Jan 2024 13:18:27 -0800
Subject: [PATCH 5/7] (fix) SpendLogs Table

---
 litellm/proxy/_types.py         |  6 +++---
 litellm/proxy/proxy_config.yaml |  8 +++++++-
 litellm/proxy/schema.prisma     |  6 +++---
 schema.prisma                   |  6 +++---
 tests/test_keys.py              | 12 +++++++++---
 5 files changed, 25 insertions(+), 13 deletions(-)

diff --git a/litellm/proxy/_types.py b/litellm/proxy/_types.py
index a3d1b4815d1d..9a5acc440641 100644
--- a/litellm/proxy/_types.py
+++ b/litellm/proxy/_types.py
@@ -346,9 +346,9 @@ class LiteLLM_SpendLogs(LiteLLMBase):
     model: Optional[str] = ""
     call_type: str
     spend: Optional[float] = 0.0
-    total_tokens: Optional[float] = 0.0
-    prompt_tokens: Optional[float] = 0.0
-    completion_tokens: Optional[float] = 0.0
+    total_tokens: Optional[int] = 0
+    prompt_tokens: Optional[int] = 0
+    completion_tokens: Optional[int] = 0
     startTime: Union[str, datetime, None]
     endTime: Union[str, datetime, None]
     user: Optional[str] = ""
diff --git a/litellm/proxy/proxy_config.yaml b/litellm/proxy/proxy_config.yaml
index 7cb2714f4207..aa950c035033 100644
--- a/litellm/proxy/proxy_config.yaml
+++ b/litellm/proxy/proxy_config.yaml
@@ -11,6 +11,12 @@ model_list:
       output_cost_per_token: 0.00003
       max_tokens: 4096
       base_model: gpt-3.5-turbo
+  - model_name: gpt-4
+    litellm_params:
+      model: azure/chatgpt-v-2
+      api_base: https://openai-gpt-4-test-v-1.openai.azure.com/
+      api_version: "2023-05-15"
+      api_key: os.environ/AZURE_API_KEY # The `os.environ/` prefix tells litellm to read this from the env. See https://docs.litellm.ai/docs/simple_proxy#load-api-keys-from-vault
   - model_name: gpt-vision
     litellm_params:
       model: azure/gpt-4-vision
@@ -61,7 +67,7 @@ model_list:
 litellm_settings:
   fallbacks: [{"openai-gpt-3.5": ["azure-gpt-3.5"]}]
   success_callback: ['langfuse']
-  max_budget: 0.025       # global budget for proxy 
+  max_budget: 10      # global budget for proxy 
   budget_duration: 30d    # global budget duration, will reset after 30d
   # cache: True     
   # setting callback class
diff --git a/litellm/proxy/schema.prisma b/litellm/proxy/schema.prisma
index 2d8b0e6621d3..2eb6332092ac 100644
--- a/litellm/proxy/schema.prisma
+++ b/litellm/proxy/schema.prisma
@@ -50,9 +50,9 @@ model LiteLLM_SpendLogs {
   call_type           String
   api_key             String  @default ("")
   spend               Float    @default(0.0)
-  total_tokens        Float    @default(0.0)
-  prompt_tokens       Float    @default(0.0)
-  completion_tokens   Float    @default(0.0)
+  total_tokens        Int     @default(0)
+  prompt_tokens       Int     @default(0)
+  completion_tokens   Int     @default(0)
   startTime           DateTime // Assuming start_time is a DateTime field
   endTime             DateTime // Assuming end_time is a DateTime field
   model               String   @default("")
diff --git a/schema.prisma b/schema.prisma
index 103186aaed69..0882c650c81b 100644
--- a/schema.prisma
+++ b/schema.prisma
@@ -53,9 +53,9 @@ model LiteLLM_SpendLogs {
   call_type           String
   api_key             String  @default ("")
   spend               Float    @default(0.0)
-  total_tokens        Float    @default(0.0)
-  prompt_tokens       Float    @default(0.0)
-  completion_tokens   Float    @default(0.0)
+  total_tokens        Int     @default(0)
+  prompt_tokens       Int     @default(0)
+  completion_tokens   Int     @default(0)
   startTime           DateTime // Assuming start_time is a DateTime field
   endTime             DateTime // Assuming end_time is a DateTime field
   model               String   @default("")
diff --git a/tests/test_keys.py b/tests/test_keys.py
index 348be63af3f0..a296ef13eb36 100644
--- a/tests/test_keys.py
+++ b/tests/test_keys.py
@@ -281,14 +281,20 @@ async def test_key_info_spend_values():
         await asyncio.sleep(5)
         spend_logs = await get_spend_logs(session=session, request_id=response["id"])
         print(f"spend_logs: {spend_logs}")
-        usage = spend_logs[0]["usage"]
+        completion_tokens = spend_logs[0]["completion_tokens"]
+        prompt_tokens = spend_logs[0]["prompt_tokens"]
+        print(f"prompt_tokens: {prompt_tokens}; completion_tokens: {completion_tokens}")
+
+        litellm.set_verbose = True
         prompt_cost, completion_cost = litellm.cost_per_token(
             model="gpt-35-turbo",
-            prompt_tokens=usage["prompt_tokens"],
-            completion_tokens=usage["completion_tokens"],
+            prompt_tokens=prompt_tokens,
+            completion_tokens=completion_tokens,
             custom_llm_provider="azure",
         )
+        print("prompt_cost: ", prompt_cost, "completion_cost: ", completion_cost)
         response_cost = prompt_cost + completion_cost
+        print(f"response_cost: {response_cost}")
         await asyncio.sleep(5)  # allow db log to be updated
         key_info = await get_key_info(session=session, get_key=key, call_key=key)
         print(

From 273e6d190565c01f0c121918260d9518d1e60e5e Mon Sep 17 00:00:00 2001
From: ishaan-jaff <ishaanjaffer0324@gmail.com>
Date: Fri, 26 Jan 2024 13:26:49 -0800
Subject: [PATCH 6/7] Revert "(fix) SpendLogs Table"

This reverts commit 2a1104d1cfef826e18637234e7c2f983d4b06826.
---
 litellm/proxy/_types.py         |  6 +++---
 litellm/proxy/proxy_config.yaml |  8 +-------
 litellm/proxy/schema.prisma     |  6 +++---
 schema.prisma                   |  6 +++---
 tests/test_keys.py              | 12 +++---------
 5 files changed, 13 insertions(+), 25 deletions(-)

diff --git a/litellm/proxy/_types.py b/litellm/proxy/_types.py
index 9a5acc440641..a3d1b4815d1d 100644
--- a/litellm/proxy/_types.py
+++ b/litellm/proxy/_types.py
@@ -346,9 +346,9 @@ class LiteLLM_SpendLogs(LiteLLMBase):
     model: Optional[str] = ""
     call_type: str
     spend: Optional[float] = 0.0
-    total_tokens: Optional[int] = 0
-    prompt_tokens: Optional[int] = 0
-    completion_tokens: Optional[int] = 0
+    total_tokens: Optional[float] = 0.0
+    prompt_tokens: Optional[float] = 0.0
+    completion_tokens: Optional[float] = 0.0
     startTime: Union[str, datetime, None]
     endTime: Union[str, datetime, None]
     user: Optional[str] = ""
diff --git a/litellm/proxy/proxy_config.yaml b/litellm/proxy/proxy_config.yaml
index aa950c035033..7cb2714f4207 100644
--- a/litellm/proxy/proxy_config.yaml
+++ b/litellm/proxy/proxy_config.yaml
@@ -11,12 +11,6 @@ model_list:
       output_cost_per_token: 0.00003
       max_tokens: 4096
       base_model: gpt-3.5-turbo
-  - model_name: gpt-4
-    litellm_params:
-      model: azure/chatgpt-v-2
-      api_base: https://openai-gpt-4-test-v-1.openai.azure.com/
-      api_version: "2023-05-15"
-      api_key: os.environ/AZURE_API_KEY # The `os.environ/` prefix tells litellm to read this from the env. See https://docs.litellm.ai/docs/simple_proxy#load-api-keys-from-vault
   - model_name: gpt-vision
     litellm_params:
       model: azure/gpt-4-vision
@@ -67,7 +61,7 @@ model_list:
 litellm_settings:
   fallbacks: [{"openai-gpt-3.5": ["azure-gpt-3.5"]}]
   success_callback: ['langfuse']
-  max_budget: 10      # global budget for proxy 
+  max_budget: 0.025       # global budget for proxy 
   budget_duration: 30d    # global budget duration, will reset after 30d
   # cache: True     
   # setting callback class
diff --git a/litellm/proxy/schema.prisma b/litellm/proxy/schema.prisma
index 2eb6332092ac..2d8b0e6621d3 100644
--- a/litellm/proxy/schema.prisma
+++ b/litellm/proxy/schema.prisma
@@ -50,9 +50,9 @@ model LiteLLM_SpendLogs {
   call_type           String
   api_key             String  @default ("")
   spend               Float    @default(0.0)
-  total_tokens        Int     @default(0)
-  prompt_tokens       Int     @default(0)
-  completion_tokens   Int     @default(0)
+  total_tokens        Float    @default(0.0)
+  prompt_tokens       Float    @default(0.0)
+  completion_tokens   Float    @default(0.0)
   startTime           DateTime // Assuming start_time is a DateTime field
   endTime             DateTime // Assuming end_time is a DateTime field
   model               String   @default("")
diff --git a/schema.prisma b/schema.prisma
index 0882c650c81b..103186aaed69 100644
--- a/schema.prisma
+++ b/schema.prisma
@@ -53,9 +53,9 @@ model LiteLLM_SpendLogs {
   call_type           String
   api_key             String  @default ("")
   spend               Float    @default(0.0)
-  total_tokens        Int     @default(0)
-  prompt_tokens       Int     @default(0)
-  completion_tokens   Int     @default(0)
+  total_tokens        Float    @default(0.0)
+  prompt_tokens       Float    @default(0.0)
+  completion_tokens   Float    @default(0.0)
   startTime           DateTime // Assuming start_time is a DateTime field
   endTime             DateTime // Assuming end_time is a DateTime field
   model               String   @default("")
diff --git a/tests/test_keys.py b/tests/test_keys.py
index a296ef13eb36..348be63af3f0 100644
--- a/tests/test_keys.py
+++ b/tests/test_keys.py
@@ -281,20 +281,14 @@ async def test_key_info_spend_values():
         await asyncio.sleep(5)
         spend_logs = await get_spend_logs(session=session, request_id=response["id"])
         print(f"spend_logs: {spend_logs}")
-        completion_tokens = spend_logs[0]["completion_tokens"]
-        prompt_tokens = spend_logs[0]["prompt_tokens"]
-        print(f"prompt_tokens: {prompt_tokens}; completion_tokens: {completion_tokens}")
-
-        litellm.set_verbose = True
+        usage = spend_logs[0]["usage"]
         prompt_cost, completion_cost = litellm.cost_per_token(
             model="gpt-35-turbo",
-            prompt_tokens=prompt_tokens,
-            completion_tokens=completion_tokens,
+            prompt_tokens=usage["prompt_tokens"],
+            completion_tokens=usage["completion_tokens"],
             custom_llm_provider="azure",
         )
-        print("prompt_cost: ", prompt_cost, "completion_cost: ", completion_cost)
         response_cost = prompt_cost + completion_cost
-        print(f"response_cost: {response_cost}")
         await asyncio.sleep(5)  # allow db log to be updated
         key_info = await get_key_info(session=session, get_key=key, call_key=key)
         print(

From 65fd405bd48d7aaacbad83eb7137863969336d95 Mon Sep 17 00:00:00 2001
From: ishaan-jaff <ishaanjaffer0324@gmail.com>
Date: Fri, 26 Jan 2024 13:33:11 -0800
Subject: [PATCH 7/7] (docs) dimensions embedding param

---
 .../docs/embedding/supported_embedding.md       | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/docs/my-website/docs/embedding/supported_embedding.md b/docs/my-website/docs/embedding/supported_embedding.md
index 735aa01c864a..d864c5796c05 100644
--- a/docs/my-website/docs/embedding/supported_embedding.md
+++ b/docs/my-website/docs/embedding/supported_embedding.md
@@ -13,8 +13,8 @@ response = embedding(model='text-embedding-ada-002', input=["good morning from l
 
 - `model`: *string* - ID of the model to use. `model='text-embedding-ada-002'`
 
-- `input`: *array* - Input text to embed, encoded as a string or array of tokens. To embed multiple inputs in a single request, pass an array of strings or array of token arrays. The input must not exceed the max input tokens for the model (8192 tokens for text-embedding-ada-002), cannot be an empty string, and any array must be 2048 dimensions or less. 
-```
+- `input`: *string or array* - Input text to embed, encoded as a string or array of tokens. To embed multiple inputs in a single request, pass an array of strings or array of token arrays. The input must not exceed the max input tokens for the model (8192 tokens for text-embedding-ada-002), cannot be an empty string, and any array must be 2048 dimensions or less. 
+```python
 input=["good morning from litellm"]
 ```
 
@@ -22,7 +22,11 @@ input=["good morning from litellm"]
 
 - `user`: *string (optional)* A unique identifier representing your end-user, 
 
-- `timeout`: *integer* - The maximum time, in seconds, to wait for the API to respond. Defaults to 600 seconds (10 minutes).
+- `dimensions`: *integer (Optional)* The number of dimensions the resulting output embeddings should have. Only supported in OpenAI/Azure text-embedding-3 and later models.
+
+- `encoding_format`: *string (Optional)* The format to return the embeddings in. Can be either `"float"` or `"base64"`. Defaults to `encoding_format="float"`
+
+- `timeout`: *integer (Optional)* - The maximum time, in seconds, to wait for the API to respond. Defaults to 600 seconds (10 minutes).
 
 - `api_base`: *string (optional)* - The api endpoint you want to call the model with
 
@@ -66,7 +70,12 @@ input=["good morning from litellm"]
 from litellm import embedding
 import os
 os.environ['OPENAI_API_KEY'] = ""
-response = embedding('text-embedding-ada-002', input=["good morning from litellm"])
+response = embedding(
+    model="text-embedding-3-small",
+    input=["good morning from litellm", "this is another item"],
+    metadata={"anything": "good day"},
+    dimensions=5 # Only supported in text-embedding-3 and later models.
+)
 ```
 
 | Model Name           | Function Call                               | Required OS Variables                |