From 413f9379a4c4aced78aeda65eb65c7ef19b730ce Mon Sep 17 00:00:00 2001 From: blefo Date: Fri, 14 Nov 2025 10:32:55 +0100 Subject: [PATCH 1/2] feat: update system instructions and reduce temperature for web search tests --- tests/e2e/test_chat_completions.py | 4 ++-- tests/e2e/test_chat_completions_http.py | 4 ++-- tests/e2e/test_responses.py | 4 ++-- tests/e2e/test_responses_http.py | 5 ++--- 4 files changed, 8 insertions(+), 9 deletions(-) diff --git a/tests/e2e/test_chat_completions.py b/tests/e2e/test_chat_completions.py index 72b491f9..8093bcde 100644 --- a/tests/e2e/test_chat_completions.py +++ b/tests/e2e/test_chat_completions.py @@ -801,7 +801,7 @@ def test_web_search(client, model, high_web_search_rate_limit): messages=[ { "role": "system", - "content": "You are a helpful assistant that provides accurate and up-to-date information.", + "content": "You are a helpful assistant that provides accurate and up-to-date information. Answer in 10 words maximum and do not reason.", }, { "role": "user", @@ -809,7 +809,7 @@ def test_web_search(client, model, high_web_search_rate_limit): }, ], extra_body={"web_search": True}, - temperature=0.2, + temperature=0.01, max_tokens=150, ) diff --git a/tests/e2e/test_chat_completions_http.py b/tests/e2e/test_chat_completions_http.py index 32538f0b..f3111288 100644 --- a/tests/e2e/test_chat_completions_http.py +++ b/tests/e2e/test_chat_completions_http.py @@ -908,7 +908,7 @@ def test_web_search(client, model, high_web_search_rate_limit): "messages": [ { "role": "system", - "content": "You are a helpful assistant that provides accurate and up-to-date information.", + "content": "You are a helpful assistant that provides accurate and up-to-date information. Answer in 10 words maximum and do not reason.", }, { "role": "user", @@ -916,7 +916,7 @@ def test_web_search(client, model, high_web_search_rate_limit): }, ], "extra_body": {"web_search": True}, - "temperature": 0.2, + "temperature": 0.01, "max_tokens": 150, } diff --git a/tests/e2e/test_responses.py b/tests/e2e/test_responses.py index f5f931c5..33b725cf 100644 --- a/tests/e2e/test_responses.py +++ b/tests/e2e/test_responses.py @@ -709,9 +709,9 @@ def test_web_search(client, model, high_web_search_rate_limit): response = client.responses.create( model=model, input="Who won the Roland Garros Open in 2024? Just reply with the winner's name.", - instructions="You are a helpful assistant that provides accurate and up-to-date information.", + instructions="You are a helpful assistant that provides accurate and up-to-date information. Answer in 10 words maximum and do not reason.", extra_body={"web_search": True}, - temperature=0.2, + temperature=0.01, ) assert response is not None, "Response should not be None" diff --git a/tests/e2e/test_responses_http.py b/tests/e2e/test_responses_http.py index 03db990a..d3073276 100644 --- a/tests/e2e/test_responses_http.py +++ b/tests/e2e/test_responses_http.py @@ -813,12 +813,11 @@ def test_web_search(client, model, high_web_search_rate_limit): "model": model, "input": "Who won the Roland Garros Open in 2024? Just reply with the winner's name.", "instructions": "You are a helpful assistant that provides accurate and up-to-date information. Answer in 10 words maximum and do not reason.", - "temperature": 0.2, - "max_output_tokens": 15000, "extra_body": {"web_search": True}, + "temperature": 0.01, } - response = client.post("/responses", json=payload, timeout=180) + response = client.post("/responses", json=payload, timeout=60) assert response.status_code == 200, ( f"Response for {model} failed with status {response.status_code}" ) From 31f09d008889aa825a9d3f01324d808fab2596e3 Mon Sep 17 00:00:00 2001 From: Baptiste Lefort <71429321+blefo@users.noreply.github.com> Date: Fri, 14 Nov 2025 12:53:05 +0100 Subject: [PATCH 2/2] Increase timeout for /responses post request --- tests/e2e/test_responses_http.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/e2e/test_responses_http.py b/tests/e2e/test_responses_http.py index d3073276..fe5fea33 100644 --- a/tests/e2e/test_responses_http.py +++ b/tests/e2e/test_responses_http.py @@ -817,7 +817,7 @@ def test_web_search(client, model, high_web_search_rate_limit): "temperature": 0.01, } - response = client.post("/responses", json=payload, timeout=60) + response = client.post("/responses", json=payload, timeout=180) assert response.status_code == 200, ( f"Response for {model} failed with status {response.status_code}" )