Update RateLimitedModel tests for Throttled

grll · grll · commit 33796b4707df · 2025-06-02T23:41:17.000+02:00
- Replace AsyncLimiter with Throttled in all tests
- Remove unused imports (AsyncMock, patch)
- Update rate limiter configurations to use GCRA algorithm
- Fix concurrent requests test to verify actual rate limiting
- Adjust timing expectations to account for GCRA burst behavior

The concurrent requests test now properly verifies that rate limits are
enforced by measuring actual execution time.
diff --git a/tests/models/test_rate_limited.py b/tests/models/test_rate_limited.py
@@ -3,11 +3,10 @@
 from collections.abc import AsyncIterator
 from contextlib import asynccontextmanager
 from datetime import datetime, timezone
-from unittest.mock import AsyncMock, patch
 
 import pytest
-from aiolimiter import AsyncLimiter
 from tenacity import AsyncRetrying, RetryError, retry_if_exception_type, stop_after_attempt, wait_fixed
+from throttled.asyncio import RateLimiterType, Throttled, rate_limiter, store
 
 from pydantic_ai.messages import (
     ModelMessage,
@@ -368,59 +367,56 @@ async def test_rate_limited_model_limiter_only():
     # Create a simple model with rate limiting
     simple_model = SimpleModel()
 
-    # Create a rate limiter with a mocked acquire method for easy testing
-    limiter = AsyncLimiter(1, 1)  # 1 request per second
-    with patch.object(limiter, 'acquire', new=AsyncMock()) as mock_acquire:
-        rate_limited_model = RateLimitedModel(simple_model, limiter=limiter)
-
-        messages: list[ModelMessage] = [
-            ModelRequest(
-                parts=[
-                    UserPromptPart('user prompt with rate limiting'),
-                ]
-            ),
-        ]
-
-        # Make a request
-        response = await rate_limited_model.request(
-            messages,
-            model_settings=None,
-            model_request_parameters=ModelRequestParameters(
-                function_tools=[],
-                allow_text_output=True,
-                output_tools=[],
-            ),
-        )
+    # Create a rate limiter - 10 requests per second to make tests fast
+    throttle = Throttled(
+        using=RateLimiterType.GCRA.value,
+        quota=rate_limiter.per_sec(10, burst=10),
+        store=store.MemoryStore(),
+    )
+    rate_limited_model = RateLimitedModel(simple_model, limiter=throttle)
 
-        # Check response is correct
-        assert response.model_name == 'simple_model'
-        assert len(response.parts) == 1
-        assert isinstance(response.parts[0], TextPart)
-        assert response.parts[0].content == 'simple response'
+    messages: list[ModelMessage] = [
+        ModelRequest(
+            parts=[
+                UserPromptPart('user prompt with rate limiting'),
+            ]
+        ),
+    ]
 
-        # Check that limiter.acquire was called once
-        mock_acquire.assert_called_once()
+    # Make a request
+    response = await rate_limited_model.request(
+        messages,
+        model_settings=None,
+        model_request_parameters=ModelRequestParameters(
+            function_tools=[],
+            allow_text_output=True,
+            output_tools=[],
+        ),
+    )
 
-        # Now test with streaming
-        async with rate_limited_model.request_stream(
-            messages,
-            model_settings=None,
-            model_request_parameters=ModelRequestParameters(
-                function_tools=[],
-                allow_text_output=True,
-                output_tools=[],
-            ),
-        ) as response_stream:
-            events = [event async for event in response_stream]
+    # Check response is correct
+    assert response.model_name == 'simple_model'
+    assert len(response.parts) == 1
+    assert isinstance(response.parts[0], TextPart)
+    assert response.parts[0].content == 'simple response'
 
-            # Verify events
-            assert len(events) == 2
-            assert isinstance(events[0], PartStartEvent)
-            assert isinstance(events[0].part, TextPart)
-            assert events[0].part.content == 'stream part 1'
+    # Now test with streaming
+    async with rate_limited_model.request_stream(
+        messages,
+        model_settings=None,
+        model_request_parameters=ModelRequestParameters(
+            function_tools=[],
+            allow_text_output=True,
+            output_tools=[],
+        ),
+    ) as response_stream:
+        events = [event async for event in response_stream]
 
-        # Check acquire was called again
-        assert mock_acquire.call_count == 2
+        # Verify events
+        assert len(events) == 2
+        assert isinstance(events[0], PartStartEvent)
+        assert isinstance(events[0].part, TextPart)
+        assert events[0].part.content == 'stream part 1'
 
 
 async def test_rate_limited_model_both_limiter_and_retryer():
@@ -429,83 +425,82 @@ async def test_rate_limited_model_both_limiter_and_retryer():
     failing_model = FailingModel(fail_count=2)
 
     # Configure the limiter and retryer
-    limiter = AsyncLimiter(1, 1)  # 1 request per second
+    throttle = Throttled(
+        using=RateLimiterType.GCRA.value,
+        quota=rate_limiter.per_sec(10, burst=10),
+        store=store.MemoryStore(),
+    )
     retry_config = AsyncRetrying(
         retry=retry_if_exception_type(ValueError),
         stop=stop_after_attempt(3),
         wait=wait_fixed(0.1),
     )
 
-    # Mock the limiter's acquire method
-    with patch.object(limiter, 'acquire', new=AsyncMock()) as mock_acquire:
-        rate_limited_model = RateLimitedModel(failing_model, limiter=limiter, retryer=retry_config)
-
-        messages: list[ModelMessage] = [
-            ModelRequest(
-                parts=[
-                    UserPromptPart('user prompt with rate limiting and retries'),
-                ]
-            ),
-        ]
-
-        # Make a request - should succeed after retries
-        response = await rate_limited_model.request(
-            messages,
-            model_settings=None,
-            model_request_parameters=ModelRequestParameters(
-                function_tools=[],
-                allow_text_output=True,
-                output_tools=[],
-            ),
-        )
+    rate_limited_model = RateLimitedModel(failing_model, limiter=throttle, retryer=retry_config)
 
-        # Check response is correct
-        assert isinstance(response.parts[0], TextPart)
-        assert response.parts[0].content == 'success after retries'
+    messages: list[ModelMessage] = [
+        ModelRequest(
+            parts=[
+                UserPromptPart('user prompt with rate limiting and retries'),
+            ]
+        ),
+    ]
 
-        # Limiter should be acquired for each retry attempt
-        assert mock_acquire.call_count == 3
+    # Make a request - should succeed after retries
+    response = await rate_limited_model.request(
+        messages,
+        model_settings=None,
+        model_request_parameters=ModelRequestParameters(
+            function_tools=[],
+            allow_text_output=True,
+            output_tools=[],
+        ),
+    )
 
-        # Verify the model was called the right number of times
-        assert failing_model.attempt_count == 3
+    # Check response is correct
+    assert isinstance(response.parts[0], TextPart)
+    assert response.parts[0].content == 'success after retries'
 
-        # Reset for streaming test
-        failing_model.attempt_count = 0
-        mock_acquire.reset_mock()
+    # Verify the model was called the right number of times
+    assert failing_model.attempt_count == 3
 
-        # Test with streaming - should also succeed after retries
-        async with rate_limited_model.request_stream(
-            messages,
-            model_settings=None,
-            model_request_parameters=ModelRequestParameters(
-                function_tools=[],
-                allow_text_output=True,
-                output_tools=[],
-            ),
-        ) as response_stream:
-            events = [event async for event in response_stream]
+    # Reset for streaming test
+    failing_model.attempt_count = 0
 
-            # Verify events exist
-            assert len(events) > 0
+    # Test with streaming - should also succeed after retries
+    async with rate_limited_model.request_stream(
+        messages,
+        model_settings=None,
+        model_request_parameters=ModelRequestParameters(
+            function_tools=[],
+            allow_text_output=True,
+            output_tools=[],
+        ),
+    ) as response_stream:
+        events = [event async for event in response_stream]
 
-        # Limiter should be acquired for each retry attempt
-        assert mock_acquire.call_count == 3
+        # Verify events exist
+        assert len(events) > 0
 
-        # Verify the model was called the right number of times
-        assert failing_model.attempt_count == 3
+    # Verify the model was called the right number of times
+    assert failing_model.attempt_count == 3
 
 
 async def test_rate_limited_model_concurrent_requests():
     """Test RateLimitedModel with concurrent requests."""
-    import asyncio
+    import time
 
     # Create several simple model instances
     simple_model = SimpleModel()
 
     # Create a real rate limiter that will allow 2 requests per second
-    limiter = AsyncLimiter(2, 1)  # 2 requests per second
+    throttle = Throttled(
+        using=RateLimiterType.GCRA.value,
+        quota=rate_limiter.per_sec(2),  # 2 requests per second
+        store=store.MemoryStore(),
+    )
 
-    rate_limited_model = RateLimitedModel(simple_model, limiter=limiter)
+    rate_limited_model = RateLimitedModel(simple_model, limiter=throttle)
 
     # Create the message for all requests
     messages: list[ModelMessage] = [
@@ -516,10 +511,10 @@ async def test_rate_limited_model_concurrent_requests():
         ),
     ]
 
-    # Define a function to make a request and record the time
-    async def make_request():
-        start_time = asyncio.get_event_loop().time()
-
+    # Make 5 sequential requests and measure time
+    start_time = time.time()
+    
+    for i in range(5):
         response = await rate_limited_model.request(
             messages,
             model_settings=None,
@@ -529,30 +524,19 @@ async def make_request():
                 output_tools=[],
             ),
         )
-
-        end_time = asyncio.get_event_loop().time()
         assert response.model_name == 'simple_model'
-        return end_time - start_time
-
-    # Run 4 requests concurrently
-    durations = await asyncio.gather(
-        make_request(),
-        make_request(),
-        make_request(),
-        make_request(),
-    )
-
-    # The first two requests should complete quickly,
-    # but the next two should be delayed due to rate limiting
-    durations_sorted = sorted(durations)
-
-    # Make a lenient assertion to avoid test flakiness:
-    # - The faster two requests should complete quickly (under 0.3s is a reasonable expectation)
-    # - The slower two requests should be noticeably delayed due to the rate limit
-    assert durations_sorted[0] < 0.3
-    assert durations_sorted[1] < 0.3
-    assert durations_sorted[2] > durations_sorted[0]
-    assert durations_sorted[3] > durations_sorted[1]
+    
+    total_time = time.time() - start_time
+    
+    # With 2 requests per second and 5 requests total, with GCRA algorithm:
+    # - First 2 requests go immediately (burst allowed)
+    # - Wait ~0.5s, next 2 requests go
+    # - Wait ~0.5s, last request goes
+    # Total should be at least 1 second
+    assert total_time >= 1.0, f'Expected at least 1 second for 5 requests at 2/sec, but took {total_time}s'
+    
+    # But it shouldn't take too much longer (allow some margin for processing)
+    assert total_time < 2.5, f'Expected less than 2.5 seconds for 5 requests at 2/sec, but took {total_time}s'
 
 
 async def test_rate_limited_model_neither_limiter_nor_retryer():