From 8425a8ba222b77bae2e390ae3438c1214a5d1872 Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Wed, 7 Feb 2024 19:21:50 -0800 Subject: [PATCH 1/2] (fix) track cost for semantic_caching, place on langfuse trace --- litellm/caching.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/litellm/caching.py b/litellm/caching.py index f996a587354b..3522a9d436eb 100644 --- a/litellm/caching.py +++ b/litellm/caching.py @@ -427,10 +427,16 @@ async def async_set_cache(self, key, value, **kwargs): else [] ) if llm_router is not None and self.embedding_model in router_model_names: + user_api_key = kwargs.get("metadata", {}).get("user_api_key", "") embedding_response = await llm_router.aembedding( model=self.embedding_model, input=prompt, cache={"no-store": True, "no-cache": True}, + metadata={ + "user_api_key": user_api_key, + "semantic-cache-embedding": True, + "trace_id": kwargs.get("metadata", {}).get("trace_id", None), + }, ) else: # convert to embedding @@ -476,13 +482,20 @@ async def async_get_cache(self, key, **kwargs): else [] ) if llm_router is not None and self.embedding_model in router_model_names: + user_api_key = kwargs.get("metadata", {}).get("user_api_key", "") embedding_response = await llm_router.aembedding( model=self.embedding_model, input=prompt, cache={"no-store": True, "no-cache": True}, + metadata={ + "user_api_key": user_api_key, + "semantic-cache-embedding": True, + "trace_id": kwargs.get("metadata", {}).get("trace_id", None), + }, ) else: # convert to embedding + user_api_key = kwargs["litellm_params"]["metadata"].get("user_api_key", "") embedding_response = await litellm.aembedding( model=self.embedding_model, input=prompt, From 8197b3de0a11cb02db67128d0cdeb8aad5e0d04b Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Wed, 7 Feb 2024 19:24:27 -0800 Subject: [PATCH 2/2] (fix) remove extra statement --- litellm/caching.py | 1 - 1 file changed, 1 deletion(-) diff --git a/litellm/caching.py b/litellm/caching.py index 3522a9d436eb..f0ae7778af9d 100644 --- a/litellm/caching.py +++ b/litellm/caching.py @@ -495,7 +495,6 @@ async def async_get_cache(self, key, **kwargs): ) else: # convert to embedding - user_api_key = kwargs["litellm_params"]["metadata"].get("user_api_key", "") embedding_response = await litellm.aembedding( model=self.embedding_model, input=prompt,