We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent bcd95b5 commit db2b1c7Copy full SHA for db2b1c7
tests/compile/test_fusions_e2e.py
@@ -37,11 +37,12 @@ class ModelBackendTestCase(NamedTuple):
37
if current_platform.is_cuda():
38
MODELS_FP8 = [
39
ModelBackendTestCase(
40
- model_name="nvidia/Llama-4-Scout-17B-16E-Instruct-FP8",
+ # Use smaller model for L40s in CI
41
+ model_name="RedHatAI/Meta-Llama-3.1-8B-Instruct-FP8",
42
model_kwargs=dict(max_model_len=1024),
43
backend=_Backend.TRITON_ATTN,
- attention_fusions=48,
44
- allreduce_fusions=96,
+ attention_fusions=32,
45
+ allreduce_fusions=65,
46
),
47
48
model_name="nvidia/Llama-4-Scout-17B-16E-Instruct-FP8",
0 commit comments