From 594666eb614026035d982c7cd12a30f6a284c90b Mon Sep 17 00:00:00 2001 From: Shota Totsuka <153569547+totsukash@users.noreply.github.com> Date: Sat, 30 Nov 2024 18:30:55 +0900 Subject: [PATCH] fix: use Gemini response metadata for token counting (#11226) --- api/core/model_runtime/model_providers/google/llm/llm.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/api/core/model_runtime/model_providers/google/llm/llm.py b/api/core/model_runtime/model_providers/google/llm/llm.py index 77e0801b63ad8a..c19e860d2e4b4b 100644 --- a/api/core/model_runtime/model_providers/google/llm/llm.py +++ b/api/core/model_runtime/model_providers/google/llm/llm.py @@ -254,8 +254,12 @@ def _handle_generate_response( assistant_prompt_message = AssistantPromptMessage(content=response.text) # calculate num tokens - prompt_tokens = self.get_num_tokens(model, credentials, prompt_messages) - completion_tokens = self.get_num_tokens(model, credentials, [assistant_prompt_message]) + if response.usage_metadata: + prompt_tokens = response.usage_metadata.prompt_token_count + completion_tokens = response.usage_metadata.candidates_token_count + else: + prompt_tokens = self.get_num_tokens(model, credentials, prompt_messages) + completion_tokens = self.get_num_tokens(model, credentials, [assistant_prompt_message]) # transform usage usage = self._calc_response_usage(model, credentials, prompt_tokens, completion_tokens)