Skip to content

Commit db2b1c7

Browse files
committed
Smaller model for e2e fusion test
Signed-off-by: Luka Govedič <lgovedic@redhat.com>
1 parent bcd95b5 commit db2b1c7

File tree

1 file changed

+4
-3
lines changed

1 file changed

+4
-3
lines changed

tests/compile/test_fusions_e2e.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,11 +37,12 @@ class ModelBackendTestCase(NamedTuple):
3737
if current_platform.is_cuda():
3838
MODELS_FP8 = [
3939
ModelBackendTestCase(
40-
model_name="nvidia/Llama-4-Scout-17B-16E-Instruct-FP8",
40+
# Use smaller model for L40s in CI
41+
model_name="RedHatAI/Meta-Llama-3.1-8B-Instruct-FP8",
4142
model_kwargs=dict(max_model_len=1024),
4243
backend=_Backend.TRITON_ATTN,
43-
attention_fusions=48,
44-
allreduce_fusions=96,
44+
attention_fusions=32,
45+
allreduce_fusions=65,
4546
),
4647
ModelBackendTestCase(
4748
model_name="nvidia/Llama-4-Scout-17B-16E-Instruct-FP8",

0 commit comments

Comments
 (0)