Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

switch openai to newer cheaper models by default #39

Merged
merged 4 commits into from
Oct 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions src/chap/backends/llama_cpp.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,6 @@ def make_full_query(self, messages: Session, max_query_size: int) -> str:
continue
result.append(formats[m.role].format(content))
full_query = "".join(result)
print("fq", full_query)
return full_query

async def aask(
Expand Down Expand Up @@ -102,5 +101,10 @@ async def aask(


def factory() -> Backend:
"""Uses the llama.cpp completion web API"""
"""Uses the llama.cpp completion web API

Note: Consider using the openai-chatgpt backend with a custom URL instead.
The llama.cpp server will automatically apply common chat templates with the
openai-chatgpt backend, while chat templates must be manually configured client side
with this backend."""
return LlamaCpp()
14 changes: 11 additions & 3 deletions src/chap/backends/openai_chatgpt.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,14 +66,20 @@ def from_model(cls, model: str) -> "EncodingMeta":
class ChatGPT:
@dataclass
class Parameters:
model: str = "gpt-4-turbo"
"""The model to use. The most common alternative value is 'gpt-3.5-turbo'."""
model: str = "gpt-4o-mini"
"""The model to use. The most common alternative value is 'gpt-4o'."""

max_request_tokens: int = 1024
"""The approximate greatest number of tokens to send in a request. When the session is long, the system prompt and 1 or more of the most recent interaction steps are sent."""

url: str = "https://api.openai.com/v1/chat/completions"
"""The URL of a chatgpt-pcompatible server's completion endpoint."""
"""The URL of a chatgpt-compatible server's completion endpoint. Notably, llama.cpp's server is compatible with this backend, and can automatically apply common chat templates too."""

temperature: float | None = None
"""The model temperature for sampling"""

top_p: float | None = None
"""The model temperature for sampling"""

def __init__(self) -> None:
self.parameters = self.Parameters()
Expand Down Expand Up @@ -135,6 +141,8 @@ async def aask(
headers={"authorization": f"Bearer {self.get_key()}"},
json={
"model": self.parameters.model,
"temperature": self.parameters.temperature,
"top_p": self.parameters.top_p,
"stream": True,
"messages": session_to_list(full_prompt),
},
Expand Down