From 37fb3b60b0eab5213125003ead374e64d2001915 Mon Sep 17 00:00:00 2001 From: mgoin Date: Wed, 12 Feb 2025 17:23:38 +0000 Subject: [PATCH] Fix failing FP8 cpu offload test Signed-off-by: mgoin --- tests/quantization/test_cpu_offload.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/quantization/test_cpu_offload.py b/tests/quantization/test_cpu_offload.py index 29a5721ef364..de03d37a74bf 100644 --- a/tests/quantization/test_cpu_offload.py +++ b/tests/quantization/test_cpu_offload.py @@ -1,5 +1,5 @@ -# SPDX-License-Identifier: Apache-2.0 - +# SPDX-License-Identifier: Apache-2.0 + # Expanded quantized model tests for CPU offloading # Base tests: tests/basic_correctness/test_cpu_offload.py @@ -14,13 +14,13 @@ reason="fp8 is not supported on this GPU type.") def test_cpu_offload_fp8(): # Test quantization of an unquantized checkpoint - compare_two_settings("meta-llama/Meta-Llama-3-8B-Instruct", + compare_two_settings("meta-llama/Llama-3.2-1B-Instruct", ["--quantization", "fp8"], - ["--quantization", "fp8", "--cpu-offload-gb", "2"], + ["--quantization", "fp8", "--cpu-offload-gb", "1"], max_wait_seconds=480) # Test loading a quantized checkpoint - compare_two_settings("neuralmagic/Meta-Llama-3-8B-Instruct-FP8", [], - ["--cpu-offload-gb", "2"], + compare_two_settings("neuralmagic/Qwen2-1.5B-Instruct-FP8", [], + ["--cpu-offload-gb", "1"], max_wait_seconds=480)