Skip to content

Commit

Permalink
Spike out synchronously rate limited OpenAI model
Browse files Browse the repository at this point in the history
  • Loading branch information
anticorrelator committed Oct 27, 2023
1 parent 6bdfaa6 commit 5f669bc
Showing 1 changed file with 34 additions and 8 deletions.
42 changes: 34 additions & 8 deletions src/phoenix/experimental/evals/models/openai.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import logging
import os
from dataclasses import dataclass, field
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union
from typing import TYPE_CHECKING, Any, Dict, List, Mapping, Optional, Tuple, Union

import requests
from openai.openai_object import OpenAIObject

from phoenix.experimental.evals.models.base import BaseEvalModel
Expand Down Expand Up @@ -34,6 +35,28 @@ def openai_token_cost(chat_completion: OpenAIObject) -> Numeric:
return chat_completion.usage.total_tokens


def openai_rate_limit_info(model_name: str, api_key: str) -> Mapping[str, int]:
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {api_key}",
}
data = {
"model": model_name,
"messages": [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "Hello"},
],
}
response = requests.post(
"https://api.openai.com/v1/chat/completions", headers=headers, json=data
)
limit_info = {
"request-limit": int(response.headers["x-ratelimit-limit-requests"]),
"token-limit": int(response.headers["x-ratelimit-limit-tokens"]),
}
return limit_info


@dataclass
class OpenAIModel(BaseEvalModel):
openai_api_type: Optional[str] = field(default=None)
Expand Down Expand Up @@ -82,11 +105,13 @@ def __post_init__(self) -> None:
@property
def rate_limiter(self) -> OpenAIRateLimiter:
if self._rate_limiter is None:
rate_limit_factory = OpenAIRateLimiter()
rate_limit_factory.set_rate_limits(
self.model_name, request_rate_limit=200, token_rate_limit=40000
self._rate_limiter = OpenAIRateLimiter()
limit_info = openai_rate_limit_info(self.model_name, self.openai_api_key)
self._rate_limiter.set_rate_limits(
self.model_name,
request_rate_limit=limit_info["request-limit"],
token_rate_limit=limit_info["token-limit"],
)
self._rate_limiter = rate_limit_factory.alimit(self.model_name, openai_token_cost)
return self._rate_limiter

def _init_environment(self) -> None:
Expand Down Expand Up @@ -197,7 +222,8 @@ def _generate_with_retry(self, **kwargs: Any) -> Any:
]

def metered_openai_completion(**kwargs: Any) -> Any:
response = self.rate_limiter(self._openai.Completion.create)(**kwargs)
limit = self.rate_limiter.limit(self.model_name, openai_token_cost)
response = limit(self._openai.Completion.create)(**kwargs)
return response

@self.retry(
Expand All @@ -213,8 +239,8 @@ def _completion_with_retry(**kwargs: Any) -> Any:
(message.get("content") or "")
for message in (kwargs.pop("messages", None) or ())
)
return self._openai.Completion.create(**kwargs)
return self._openai.ChatCompletion.create(**kwargs)
return metered_openai_completion(**kwargs)
return metered_openai_completion(**kwargs)

return _completion_with_retry(**kwargs)

Expand Down

0 comments on commit 5f669bc

Please sign in to comment.