Skip to content

Commit b2c45f8

Browse files
ankursharmascopybara-github
authored andcommitted
chore: Enhance the messaging with possible fixes for RESOURCE_EXHAUSTED errors from Gemini
Co-authored-by: Ankur Sharma <ankusharma@google.com> PiperOrigin-RevId: 833538475
1 parent 5ac5129 commit b2c45f8

File tree

2 files changed

+137
-40
lines changed

2 files changed

+137
-40
lines changed

src/google/adk/models/google_llm.py

Lines changed: 80 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
from typing import Union
2929

3030
from google.genai import types
31+
from google.genai.errors import ClientError
3132
from typing_extensions import override
3233

3334
from .. import version
@@ -51,6 +52,34 @@
5152
_AGENT_ENGINE_TELEMETRY_TAG = 'remote_reasoning_engine'
5253
_AGENT_ENGINE_TELEMETRY_ENV_VARIABLE_NAME = 'GOOGLE_CLOUD_AGENT_ENGINE_ID'
5354

55+
_RESOURCE_EXHAUSTED_POSSIBLE_FIX_MESSAGE = """
56+
On how to mitigate this issue, please refer to:
57+
58+
https://google.github.io/adk-docs/agents/models/#error-code-429-resource_exhausted
59+
"""
60+
61+
62+
class _ResourceExhaustedError(ClientError):
63+
"""Represents an resources exhausted error received from the Model."""
64+
65+
def __init__(
66+
self,
67+
client_error: ClientError,
68+
):
69+
super().__init__(
70+
code=client_error.code,
71+
response_json=client_error.details,
72+
response=client_error.response,
73+
)
74+
75+
def __str__(self):
76+
# We don't get override the actual message on ClientError, so we override
77+
# this method instead. This will ensure that when the exception is
78+
# stringified (for either publishing the exception on console or to logs)
79+
# we put in the required details for the developer.
80+
base_message = super().__str__()
81+
return f'{_RESOURCE_EXHAUSTED_POSSIBLE_FIX_MESSAGE}\n\n{base_message}'
82+
5483

5584
class Gemini(BaseLlm):
5685
"""Integration for Gemini models.
@@ -149,50 +178,61 @@ async def generate_content_async(
149178
llm_request.config.http_options.headers
150179
)
151180

152-
if stream:
153-
responses = await self.api_client.aio.models.generate_content_stream(
154-
model=llm_request.model,
155-
contents=llm_request.contents,
156-
config=llm_request.config,
157-
)
181+
try:
182+
if stream:
183+
responses = await self.api_client.aio.models.generate_content_stream(
184+
model=llm_request.model,
185+
contents=llm_request.contents,
186+
config=llm_request.config,
187+
)
188+
189+
# for sse, similar as bidi (see receive method in
190+
# gemini_llm_connection.py), we need to mark those text content as
191+
# partial and after all partial contents are sent, we send an
192+
# accumulated event which contains all the previous partial content. The
193+
# only difference is bidi rely on complete_turn flag to detect end while
194+
# sse depends on finish_reason.
195+
aggregator = StreamingResponseAggregator()
196+
async with Aclosing(responses) as agen:
197+
async for response in agen:
198+
logger.debug(_build_response_log(response))
199+
async with Aclosing(
200+
aggregator.process_response(response)
201+
) as aggregator_gen:
202+
async for llm_response in aggregator_gen:
203+
yield llm_response
204+
if (close_result := aggregator.close()) is not None:
205+
# Populate cache metadata in the final aggregated response for
206+
# streaming
207+
if cache_metadata:
208+
cache_manager.populate_cache_metadata_in_response(
209+
close_result, cache_metadata
210+
)
211+
yield close_result
212+
213+
else:
214+
response = await self.api_client.aio.models.generate_content(
215+
model=llm_request.model,
216+
contents=llm_request.contents,
217+
config=llm_request.config,
218+
)
219+
logger.info('Response received from the model.')
220+
logger.debug(_build_response_log(response))
158221

159-
# for sse, similar as bidi (see receive method in gemini_llm_connection.py),
160-
# we need to mark those text content as partial and after all partial
161-
# contents are sent, we send an accumulated event which contains all the
162-
# previous partial content. The only difference is bidi rely on
163-
# complete_turn flag to detect end while sse depends on finish_reason.
164-
aggregator = StreamingResponseAggregator()
165-
async with Aclosing(responses) as agen:
166-
async for response in agen:
167-
logger.debug(_build_response_log(response))
168-
async with Aclosing(
169-
aggregator.process_response(response)
170-
) as aggregator_gen:
171-
async for llm_response in aggregator_gen:
172-
yield llm_response
173-
if (close_result := aggregator.close()) is not None:
174-
# Populate cache metadata in the final aggregated response for streaming
222+
llm_response = LlmResponse.create(response)
175223
if cache_metadata:
176224
cache_manager.populate_cache_metadata_in_response(
177-
close_result, cache_metadata
225+
llm_response, cache_metadata
178226
)
179-
yield close_result
180-
181-
else:
182-
response = await self.api_client.aio.models.generate_content(
183-
model=llm_request.model,
184-
contents=llm_request.contents,
185-
config=llm_request.config,
186-
)
187-
logger.info('Response received from the model.')
188-
logger.debug(_build_response_log(response))
189-
190-
llm_response = LlmResponse.create(response)
191-
if cache_metadata:
192-
cache_manager.populate_cache_metadata_in_response(
193-
llm_response, cache_metadata
194-
)
195-
yield llm_response
227+
yield llm_response
228+
except ClientError as ce:
229+
if ce.code == 429:
230+
# We expect running into a Resource Exhausted error to be a common
231+
# client error that developers would run into. We enhance the messaging
232+
# with possible fixes to this issue.
233+
raise _ResourceExhaustedError(ce) from ce
234+
235+
raise ce
196236

197237
@cached_property
198238
def api_client(self) -> Client:

tests/unittests/models/test_google_llm.py

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,14 @@
2626
from google.adk.models.google_llm import _AGENT_ENGINE_TELEMETRY_TAG
2727
from google.adk.models.google_llm import _build_function_declaration_log
2828
from google.adk.models.google_llm import _build_request_log
29+
from google.adk.models.google_llm import _RESOURCE_EXHAUSTED_POSSIBLE_FIX_MESSAGE
30+
from google.adk.models.google_llm import _ResourceExhaustedError
2931
from google.adk.models.google_llm import Gemini
3032
from google.adk.models.llm_request import LlmRequest
3133
from google.adk.models.llm_response import LlmResponse
3234
from google.adk.utils.variant_utils import GoogleLLMVariant
3335
from google.genai import types
36+
from google.genai.errors import ClientError
3437
from google.genai.types import Content
3538
from google.genai.types import Part
3639
import pytest
@@ -386,6 +389,60 @@ async def mock_coro():
386389
mock_client.aio.models.generate_content_stream.assert_called_once()
387390

388391

392+
@pytest.mark.parametrize("stream", [True, False])
393+
@pytest.mark.asyncio
394+
async def test_generate_content_async_resource_exhausted_error(
395+
stream, gemini_llm, llm_request
396+
):
397+
with mock.patch.object(gemini_llm, "api_client") as mock_client:
398+
err = ClientError(code=429, response_json={})
399+
err.code = 429
400+
if stream:
401+
mock_client.aio.models.generate_content_stream.side_effect = err
402+
else:
403+
mock_client.aio.models.generate_content.side_effect = err
404+
405+
with pytest.raises(_ResourceExhaustedError) as excinfo:
406+
responses = []
407+
async for resp in gemini_llm.generate_content_async(
408+
llm_request, stream=stream
409+
):
410+
responses.append(resp)
411+
assert _RESOURCE_EXHAUSTED_POSSIBLE_FIX_MESSAGE in str(excinfo.value)
412+
assert excinfo.value.code == 429
413+
if stream:
414+
mock_client.aio.models.generate_content_stream.assert_called_once()
415+
else:
416+
mock_client.aio.models.generate_content.assert_called_once()
417+
418+
419+
@pytest.mark.parametrize("stream", [True, False])
420+
@pytest.mark.asyncio
421+
async def test_generate_content_async_other_client_error(
422+
stream, gemini_llm, llm_request
423+
):
424+
with mock.patch.object(gemini_llm, "api_client") as mock_client:
425+
err = ClientError(code=500, response_json={})
426+
err.code = 500
427+
if stream:
428+
mock_client.aio.models.generate_content_stream.side_effect = err
429+
else:
430+
mock_client.aio.models.generate_content.side_effect = err
431+
432+
with pytest.raises(ClientError) as excinfo:
433+
responses = []
434+
async for resp in gemini_llm.generate_content_async(
435+
llm_request, stream=stream
436+
):
437+
responses.append(resp)
438+
assert excinfo.value.code == 500
439+
assert not isinstance(excinfo.value, _ResourceExhaustedError)
440+
if stream:
441+
mock_client.aio.models.generate_content_stream.assert_called_once()
442+
else:
443+
mock_client.aio.models.generate_content.assert_called_once()
444+
445+
389446
@pytest.mark.asyncio
390447
async def test_connect(gemini_llm, llm_request):
391448
# Create a mock connection

0 commit comments

Comments
 (0)