@@ -76,3 +76,51 @@ def test_embed_query_non_retryable_error_handling(mock_import: Mock) -> None:
7676
7777 # Verify the model was called only once (no retries for non-rate-limit errors)
7878 assert mock_model .encode .call_count == 1
79+
80+
81+ @patch ("builtins.__import__" )
82+ def test_embed_query_rate_limit_error_retries (mock_import : Mock ) -> None :
83+ """Test that rate limit errors are retried the expected number of times."""
84+ MockSentenceTransformer = get_mock_sentence_transformers ()
85+ mock_import .return_value = MockSentenceTransformer
86+ mock_model = MockSentenceTransformer .SentenceTransformer .return_value
87+
88+ # Rate limit error that should trigger retries (matches "too many requests" pattern)
89+ # Create separate exception instances for each retry attempt
90+ mock_model .encode .side_effect = [
91+ Exception ("too many requests - please wait" ),
92+ Exception ("too many requests - please wait" ),
93+ Exception ("too many requests - please wait" ),
94+ ]
95+
96+ instance = SentenceTransformerEmbeddings ()
97+
98+ # After exhausting retries, tenacity raises RetryError (since retries should work)
99+ with pytest .raises (RetryError ):
100+ instance .embed_query ("test query" )
101+
102+ # Verify the model was called 3 times (default max_attempts for RetryRateLimitHandler)
103+ assert mock_model .encode .call_count == 3
104+
105+
106+ @patch ("builtins.__import__" )
107+ def test_embed_query_rate_limit_error_eventual_success (mock_import : Mock ) -> None :
108+ """Test that rate limit errors eventually succeed after retries."""
109+ MockSentenceTransformer = get_mock_sentence_transformers ()
110+ mock_import .return_value = MockSentenceTransformer
111+ mock_model = MockSentenceTransformer .SentenceTransformer .return_value
112+
113+ # First two calls fail with rate limit, third succeeds
114+ mock_model .encode .side_effect = [
115+ Exception ("too many requests - please wait" ),
116+ Exception ("too many requests - please wait" ),
117+ np .array ([[0.1 , 0.2 , 0.3 ]]),
118+ ]
119+
120+ instance = SentenceTransformerEmbeddings ()
121+ result = instance .embed_query ("test query" )
122+
123+ # Verify successful result
124+ assert result == [0.1 , 0.2 , 0.3 ]
125+ # Verify the model was called 3 times before succeeding
126+ assert mock_model .encode .call_count == 3
0 commit comments