diff --git a/examples/server/tests/features/steps/steps.py b/examples/server/tests/features/steps/steps.py index f4b1ac1d71840..577b87af34195 100644 --- a/examples/server/tests/features/steps/steps.py +++ b/examples/server/tests/features/steps/steps.py @@ -887,6 +887,7 @@ async def oai_chat_completions(user_prompt, base_path, async_client, debug=False, + temperature=None, model=None, n_predict=None, enable_streaming=None, @@ -913,7 +914,8 @@ async def oai_chat_completions(user_prompt, "model": model, "max_tokens": n_predict, "stream": enable_streaming, - "seed": seed + "temperature": temperature if temperature is not None else 0.0, + "seed": seed, } if response_format is not None: payload['response_format'] = response_format @@ -978,7 +980,8 @@ async def oai_chat_completions(user_prompt, max_tokens=n_predict, stream=enable_streaming, response_format=payload.get('response_format'), - seed=seed + seed=seed, + temperature=payload['temperature'] ) except openai.error.AuthenticationError as e: if expect_api_error is not None and expect_api_error: diff --git a/examples/server/utils.hpp b/examples/server/utils.hpp index d872b63f537f4..d8a2286e4b1df 100644 --- a/examples/server/utils.hpp +++ b/examples/server/utils.hpp @@ -371,7 +371,7 @@ static json oaicompat_completion_params_parse( llama_params["presence_penalty"] = json_value(body, "presence_penalty", 0.0); llama_params["seed"] = json_value(body, "seed", LLAMA_DEFAULT_SEED); llama_params["stream"] = json_value(body, "stream", false); - llama_params["temperature"] = json_value(body, "temperature", 0.0); + llama_params["temperature"] = json_value(body, "temperature", 1.0); llama_params["top_p"] = json_value(body, "top_p", 1.0); // Apply chat template to the list of messages