Skip to content

Commit 9288d9a

Browse files
committed
Not in this branch
1 parent eb4f5fc commit 9288d9a

File tree

1 file changed

+48
-0
lines changed

1 file changed

+48
-0
lines changed

tests/unit/embeddings/test_sentence_transformers.py

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,3 +76,51 @@ def test_embed_query_non_retryable_error_handling(mock_import: Mock) -> None:
7676

7777
# Verify the model was called only once (no retries for non-rate-limit errors)
7878
assert mock_model.encode.call_count == 1
79+
80+
81+
@patch("builtins.__import__")
82+
def test_embed_query_rate_limit_error_retries(mock_import: Mock) -> None:
83+
"""Test that rate limit errors are retried the expected number of times."""
84+
MockSentenceTransformer = get_mock_sentence_transformers()
85+
mock_import.return_value = MockSentenceTransformer
86+
mock_model = MockSentenceTransformer.SentenceTransformer.return_value
87+
88+
# Rate limit error that should trigger retries (matches "too many requests" pattern)
89+
# Create separate exception instances for each retry attempt
90+
mock_model.encode.side_effect = [
91+
Exception("too many requests - please wait"),
92+
Exception("too many requests - please wait"),
93+
Exception("too many requests - please wait"),
94+
]
95+
96+
instance = SentenceTransformerEmbeddings()
97+
98+
# After exhausting retries, tenacity raises RetryError (since retries should work)
99+
with pytest.raises(RetryError):
100+
instance.embed_query("test query")
101+
102+
# Verify the model was called 3 times (default max_attempts for RetryRateLimitHandler)
103+
assert mock_model.encode.call_count == 3
104+
105+
106+
@patch("builtins.__import__")
107+
def test_embed_query_rate_limit_error_eventual_success(mock_import: Mock) -> None:
108+
"""Test that rate limit errors eventually succeed after retries."""
109+
MockSentenceTransformer = get_mock_sentence_transformers()
110+
mock_import.return_value = MockSentenceTransformer
111+
mock_model = MockSentenceTransformer.SentenceTransformer.return_value
112+
113+
# First two calls fail with rate limit, third succeeds
114+
mock_model.encode.side_effect = [
115+
Exception("too many requests - please wait"),
116+
Exception("too many requests - please wait"),
117+
np.array([[0.1, 0.2, 0.3]]),
118+
]
119+
120+
instance = SentenceTransformerEmbeddings()
121+
result = instance.embed_query("test query")
122+
123+
# Verify successful result
124+
assert result == [0.1, 0.2, 0.3]
125+
# Verify the model was called 3 times before succeeding
126+
assert mock_model.encode.call_count == 3

0 commit comments

Comments
 (0)