Skip to content

Commit fed1f44

Browse files
committed
[wip] enable 3d weights for NVFP4Tensor
Summary: doesn't work yet, stay tuned this is needed for vLLM stitching 2d weights into a 3d weight for MoEs Test Plan: Reviewers: Subscribers: Tasks: Tags: ghstack-source-id: 9f4b94d ghstack-comment-id: 3357908175 Pull-Request: #3109
1 parent 8335b31 commit fed1f44

File tree

2 files changed

+22
-0
lines changed

2 files changed

+22
-0
lines changed

test/prototype/mx_formats/test_inference_workflow.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -218,3 +218,13 @@ def test_narrow_similar_to_vllm(self):
218218
gemm_kernel_choice=MXGemmKernelChoice.EMULATED,
219219
)
220220
self._test_narrow_similar_to_vllm(config)
221+
222+
# TODO(next): make this test pass by enabling 3d NVFP4Tensor, currently a lot
223+
# of places hardcode 2d
224+
def test_nvfp4_quantize_3d_param_similar_to_vllm(self):
225+
config = NVFP4InferenceConfig(
226+
mm_config=NVFP4MMConfig.WEIGHT_ONLY,
227+
use_triton_kernel=False,
228+
use_dynamic_per_tensor_scale=False,
229+
)
230+
self._test_quantize_3d_param_similar_to_vllm(config)

torchao/testing/utils.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -625,6 +625,18 @@ def _test_narrow_similar_to_vllm(self, config: AOBaseConfig):
625625
f"shape mismatch: {orig_attr.shape} vs {new_attr.shape}"
626626
)
627627

628+
def _test_quantize_3d_param_similar_to_vllm(self, config: AOBaseConfig):
629+
# this happens when vLLM loads empty MoE weights and quantizes
630+
# them
631+
632+
dtype = torch.bfloat16
633+
with torch.device("meta"):
634+
l = torch.nn.Linear(1024, 1024, device="cuda", dtype=dtype)
635+
l.weight = torch.nn.Parameter(
636+
torch.randn(60, 2816, 2048, device="cuda", dtype=dtype)
637+
)
638+
quantize_(l, config)
639+
628640

629641
common_utils.instantiate_parametrized_tests(TorchAOBasicTestCase)
630642
common_utils.instantiate_parametrized_tests(TorchAOCompileTestCase)

0 commit comments

Comments
 (0)