Skip to content

Commit

Permalink
Merge pull request #1802 from BerriAI/litellm_vertex_ai_high_traffic_fix
Browse files Browse the repository at this point in the history
fix(vertex_ai.py): treat vertex ai high-traffic error as a rate limit error - allows user-controlled backoff logic to work here
  • Loading branch information
krrishdholakia authored Feb 3, 2024
2 parents 6eb616c + e617ef9 commit 1d241b4
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 3 deletions.
5 changes: 2 additions & 3 deletions litellm/tests/test_amazing_vertex_completion.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,8 +99,7 @@ def test_vertex_ai():
# litellm.vertex_project = "reliablekeys"

test_models = random.sample(test_models, 1)
# test_models += litellm.vertex_language_models # always test gemini-pro
test_models = litellm.vertex_language_models # always test gemini-pro
test_models += litellm.vertex_language_models # always test gemini-pro
for model in test_models:
try:
if model in [
Expand Down Expand Up @@ -374,7 +373,7 @@ async def gemini_pro_async_function_calling():
print(f"completion: {completion}")


asyncio.run(gemini_pro_async_function_calling())
# asyncio.run(gemini_pro_async_function_calling())

# Extra gemini Vision tests for completion + stream, async, async + stream
# if we run into issues with gemini, we will also add these to our ci/cd pipeline
Expand Down
17 changes: 17 additions & 0 deletions litellm/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6468,6 +6468,23 @@ def exception_type(
llm_provider="vertex_ai",
response=original_exception.response,
)
elif (
"429 Quota exceeded" in error_str
or "IndexError: list index out of range"
):
exception_mapping_worked = True
raise RateLimitError(
message=f"VertexAIException - {error_str}",
model=model,
llm_provider="vertex_ai",
response=httpx.Response(
status_code=429,
request=httpx.Request(
method="POST",
url=" https://cloud.google.com/vertex-ai/",
),
),
)
if hasattr(original_exception, "status_code"):
if original_exception.status_code == 400:
exception_mapping_worked = True
Expand Down

0 comments on commit 1d241b4

Please sign in to comment.