make mxtensor printing nicer

vkuzo · vkuzo · commit 65e961158981 · 2025-09-25T14:29:52.000-07:00
Summary: Fix printing of linear weight wrapped with MXTensor. Test Plan: quantize a Qwen MoE model with mxfp4 and print it old version would print the data for each weight new version prints this: ```python (self_attn): Qwen2MoeSdpaAttention( (q_proj): Linear(in_features=2048, out_features=2048, weight=MXTensor(self._elem_dtype=torch.float4_e2m1fn_x2, self._block_size=32, torch.bfloat16, MXGemmKernelChoice.EMULATED, self.act_quant_kwargs=QuantizeTensorToMXKwargs(elem_dtype=torch.float4_e2m1fn_x2, block_size=32, scaling_mode=<ScaleCalculationMode.FLOOR: 'floor'>, gemm_kernel_choice=<MXGemmKernelChoice.EMULATED: 'emulated'>, pack_fp6=False))) ``` Reviewers: Subscribers: Tasks: Tags: ghstack-source-id: fc9e992 ghstack-comment-id: 3336009172 Pull Request resolved: #3068
diff --git a/torchao/prototype/mx_formats/inference_workflow.py b/torchao/prototype/mx_formats/inference_workflow.py
@@ -24,6 +24,7 @@
     QuantizeTensorToNVFP4Kwargs,
     per_tensor_amax_to_scale,
 )
+from torchao.quantization.quant_api import _quantization_type
 from torchao.quantization.transform_module import (
     register_quantize_module_handler,
 )
@@ -89,7 +90,7 @@ def __post_init__(self):
 
 
 def _linear_extra_repr(self):
-    return f"in_features={self.weight.shape[1]}, out_features={self.weight.shape[0]}, weight={repr(self.weight)}"
+    return f"in_features={self.weight.shape[1]}, out_features={self.weight.shape[0]}, weight={_quantization_type(self.weight)}"
 
 
 @register_quantize_module_handler(MXFPInferenceConfig)
diff --git a/torchao/prototype/mx_formats/mx_tensor.py b/torchao/prototype/mx_formats/mx_tensor.py
@@ -544,6 +544,9 @@ def __repr__(self):
         # TODO better elem dtype print for fp4
         return f"MXTensor: elem_dtype: {self._elem_dtype}, s_e8m0: {self._scale_e8m0}, d: {self.qdata}, act_quant_kwargs: {self.act_quant_kwargs}"  # noqa: E501
 
+    def _quantization_type(self):
+        return f"{self._elem_dtype=}, {self._block_size=}, {self._orig_dtype=}, {self._gemm_kernel_choice=}, {self.act_quant_kwargs=}"
+
     @classmethod
     def __torch_dispatch__(cls, func, types, args, kwargs=None):
         # avoid circular dependency

Original file line number	Diff line number	Diff line change
`@@ -24,6 +24,7 @@`
`24`	`24`	`QuantizeTensorToNVFP4Kwargs,`
`25`	`25`	`per_tensor_amax_to_scale,`
`26`	`26`	`)`
	`27`	`+from torchao.quantization.quant_api import _quantization_type`
`27`	`28`	`from torchao.quantization.transform_module import (`
`28`	`29`	`register_quantize_module_handler,`
`29`	`30`	`)`
`@@ -89,7 +90,7 @@ def __post_init__(self):`
`89`	`90`
`90`	`91`
`91`	`92`	`def _linear_extra_repr(self):`
`92`		`- return f"in_features={self.weight.shape[1]}, out_features={self.weight.shape[0]}, weight={repr(self.weight)}"`
	`93`	`+ return f"in_features={self.weight.shape[1]}, out_features={self.weight.shape[0]}, weight={_quantization_type(self.weight)}"`
`93`	`94`
`94`	`95`
`95`	`96`	`@register_quantize_module_handler(MXFPInferenceConfig)`