vllm-project · mgoin · Oct 13, 2025 · Oct 13, 2025 · Oct 13, 2025
@@ -15,7 +15,6 @@
 # Avoid combinatorial explosion with full Cartesian product
 NUM_TOKENS_HIDDEN_SIZES = [
     *[(1, i) for i in [1, 64, *VEC_HIDDEN_SIZES, 5120, 5137]],
-    *[(83, i) for i in [1, 1033, 2048, 5120]],
     *[(2048, i) for i in [1, 64, *VEC_HIDDEN_SIZES, 5137]],
     *[(4096, i) for i in [1, 64, 5137]],
 ]

@@ -11,19 +11,7 @@
 
 DTYPES = [torch.half, torch.bfloat16, torch.float]
 NUM_TOKENS = [7, 83, 4096]  # Arbitrary values for testing
-HIDDEN_SIZES = [
-    8,
-    768,
-    769,
-    770,
-    771,
-    5120,
-    5124,
-    5125,
-    5126,
-    8192,
-    8199,
-]  # Arbitrary values for testing
+HIDDEN_SIZES = [8, 768, 769, 5120, 5125, 8192]  # Arbitrary values for testing
 ADD_RESIDUAL = [False, True]
 SEEDS = [0]
 CUDA_DEVICES = [f"cuda:{i}" for i in range(1 if torch.cuda.device_count() == 1 else 2)]
@@ -118,7 +106,7 @@ def test_poly_norm(
 @pytest.mark.parametrize("hidden_size", HIDDEN_SIZES)
 @pytest.mark.parametrize("add_residual", ADD_RESIDUAL)
 @pytest.mark.parametrize("dtype", DTYPES)
-@pytest.mark.parametrize("quant_scale", [1.0, 0.01, 10.0])
+@pytest.mark.parametrize("quant_scale", [0.01, 1.0, 10.0])
 @pytest.mark.parametrize("seed", SEEDS)
 @pytest.mark.parametrize("device", CUDA_DEVICES)
 @pytest.mark.parametrize("strided_input", [False, True])

@@ -9,7 +9,7 @@
 
 
 @pytest.mark.parametrize("shape", [(1, 512), (544, 4096), (67, 8192)])
-@pytest.mark.parametrize("dtype", [torch.bfloat16, torch.float16])
+@pytest.mark.parametrize("dtype", [torch.bfloat16])
 def test_permute_cols(shape, dtype):
     x = torch.randn(shape, dtype=dtype).cuda()
     perm = torch.randperm(x.shape[1]).to(torch.int).cuda()

@@ -12,8 +12,8 @@
 from vllm.platforms import current_platform
 
 IS_NEOX_STYLE = [True, False]
-DTYPES = [torch.half, torch.bfloat16, torch.float]
-HEAD_SIZES = [64, 80, 112, 120, 256]
+DTYPES = [torch.bfloat16, torch.float]
+HEAD_SIZES = [64, 80, 120, 256]
 ROTARY_DIMS = [None, 32]  # None means rotary dim == head size
 NUM_HEADS = [17]  # Arbitrary values for testing
 BATCH_SIZES = [5]  # Arbitrary values for testing