Skip to content

Commit

Permalink
chore: add temperature to LLMConfig (#735)
Browse files Browse the repository at this point in the history
Co-authored-by: Charles Packer <packercharles@gmail.com>
  • Loading branch information
kl2806 and cpacker authored Jan 23, 2025
1 parent 4e29dac commit 4a4e62a
Show file tree
Hide file tree
Showing 4 changed files with 12 additions and 3 deletions.
2 changes: 2 additions & 0 deletions letta/llm_api/llm_api_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,7 @@ def create(
data=dict(
contents=[m.to_google_ai_dict() for m in messages],
tools=tools,
generation_config={"temperature": llm_config.temperature},
),
inner_thoughts_in_kwargs=llm_config.put_inner_thoughts_in_kwargs,
)
Expand All @@ -261,6 +262,7 @@ def create(
# user=str(user_id),
# NOTE: max_tokens is required for Anthropic API
max_tokens=1024, # TODO make dynamic
temperature=llm_config.temperature,
),
)

Expand Down
6 changes: 4 additions & 2 deletions letta/llm_api/openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,8 @@ def build_openai_chat_completions_request(
tools=[Tool(type="function", function=f) for f in functions] if functions else None,
tool_choice=tool_choice,
user=str(user_id),
max_tokens=max_tokens,
max_completion_tokens=max_tokens,
temperature=llm_config.temperature,
)
else:
data = ChatCompletionRequest(
Expand All @@ -134,7 +135,8 @@ def build_openai_chat_completions_request(
functions=functions,
function_call=function_call,
user=str(user_id),
max_tokens=max_tokens,
max_completion_tokens=max_tokens,
temperature=llm_config.temperature,
)
# https://platform.openai.com/docs/guides/text-generation/json-mode
# only supported by gpt-4o, gpt-4-turbo, or gpt-3.5-turbo
Expand Down
5 changes: 5 additions & 0 deletions letta/schemas/llm_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ class LLMConfig(BaseModel):
model_wrapper (str): The wrapper for the model. This is used to wrap additional text around the input/output of the model. This is useful for text-to-text completions, such as the Completions API in OpenAI.
context_window (int): The context window size for the model.
put_inner_thoughts_in_kwargs (bool): Puts `inner_thoughts` as a kwarg in the function call if this is set to True. This helps with function calling performance and also the generation of inner thoughts.
temperature (float): The temperature to use when generating text with the model. A higher temperature will result in more random text.
"""

# TODO: 🤮 don't default to a vendor! bug city!
Expand Down Expand Up @@ -46,6 +47,10 @@ class LLMConfig(BaseModel):
description="Puts 'inner_thoughts' as a kwarg in the function call if this is set to True. This helps with function calling performance and also the generation of inner thoughts.",
)
handle: Optional[str] = Field(None, description="The handle for this config, in the format provider/model-name.")
temperature: float = Field(
0.7,
description="The temperature to use when generating text with the model. A higher temperature will result in more random text.",
)

# FIXME hack to silence pydantic protected namespace warning
model_config = ConfigDict(protected_namespaces=())
Expand Down
2 changes: 1 addition & 1 deletion letta/schemas/openai/chat_completions.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ class ChatCompletionRequest(BaseModel):
logit_bias: Optional[Dict[str, int]] = None
logprobs: Optional[bool] = False
top_logprobs: Optional[int] = None
max_tokens: Optional[int] = None
max_completion_tokens: Optional[int] = None
n: Optional[int] = 1
presence_penalty: Optional[float] = 0
response_format: Optional[ResponseFormat] = None
Expand Down

0 comments on commit 4a4e62a

Please sign in to comment.