BerriAI · neubig · Dec 1, 2024 · Dec 1, 2024 · krrishdholakia · Dec 3, 2024
diff --git a/litellm/litellm_core_utils/exception_mapping_utils.py b/litellm/litellm_core_utils/exception_mapping_utils.py
@@ -797,6 +797,9 @@ def exception_type(  # type: ignore  # noqa: PLR0915
                         message=f"BedrockException: Rate Limit Error - {error_str}",
                         model=model,
                         llm_provider="bedrock",
+                        type="throttling_error",
 class RateLimitError(openai.RateLimitError):  # type: ignore 
 class RateLimitError(openai.RateLimitError):  # type: ignore 
+                        param=None,
+                        code=429,
                         response=getattr(original_exception, "response", None),
                     )
                 elif (

diff --git a/tests/proxy_unit_tests/test_configs/test_bad_config.yaml b/tests/proxy_unit_tests/test_configs/test_bad_config.yaml
@@ -18,4 +18,10 @@ model_list:
       model: azure/azure-embedding-model
       api_base: os.environ/AZURE_API_BASE
       api_key: bad-key
+  - model_name: anthropic.claude-v2
+    litellm_params:
+      model: bedrock/anthropic.claude-v2
+      aws_access_key_id: bad-key
+      aws_secret_access_key: bad-key
+      aws_region_name: us-east-1
 
diff --git a/tests/proxy_unit_tests/test_proxy_exception_mapping.py b/tests/proxy_unit_tests/test_proxy_exception_mapping.py
@@ -316,3 +316,65 @@ def test_chat_completion_exception_azure_context_window(mock_acompletion, client
 
     except Exception as e:
         pytest.fail(f"LiteLLM Proxy test failed. Exception {str(e)}")
+
+
+bedrock_throttling_error_response_dict = {
+    "error": {
+        "message": "ThrottlingException - Rate exceeded",
+        "type": "throttling_error",
+        "param": None,
+        "code": 429,
+    },
+}
+bedrock_throttling_error_response = Response(
+    status_code=429,
+    content=json.dumps(bedrock_throttling_error_response_dict),
+)
+
+@mock.patch(
+    "litellm.proxy.proxy_server.llm_router.acompletion",
+    return_value=bedrock_throttling_error_response,
+)
+def test_chat_completion_exception_bedrock_throttling(mock_acompletion, client):
+    try:
+        # Your test data
+        test_data = {
+            "model": "anthropic.claude-v2",
+            "messages": [
+                {"role": "user", "content": "hi"},
+            ],
+            "max_tokens": 10,
+        }
+        response = None
+
+        response = client.post("/chat/completions", json=test_data)
+        print("got response from server", response)
+
+        mock_acompletion.assert_called_once_with(
+            **test_data,
+            litellm_call_id=mock.ANY,
+            litellm_logging_obj=mock.ANY,
+            request_timeout=mock.ANY,
+            metadata=mock.ANY,
+            proxy_server_request=mock.ANY,
+        )
+
+        json_response = response.json()
+
+        print("keys in json response", json_response.keys())
+
+        assert json_response.keys() == {"error"}
+
+        assert json_response == bedrock_throttling_error_response_dict
+
+        # make an openai client to call _make_status_error_from_response
+        openai_client = openai.OpenAI(api_key="anything")
+        openai_exception = openai_client._make_status_error_from_response(
+            response=response
+        )
+        print("exception from proxy", openai_exception)
+        assert isinstance(openai_exception, openai.RateLimitError)
+        print("passed exception is of type RateLimitError")
+
+    except Exception as e:
+        pytest.fail(f"LiteLLM Proxy test failed. Exception {str(e)}")