Rename Int4WeightPreshuffledFakeQuantizeConfig

andrewor14 · andrewor14 · commit f48e988e492a · 2025-09-15T09:49:20.000-07:00
**Summary:** This config actually works for both preshuffled and
plain int4 QAT, so we remove "Preshuffled" from the name.

BC-breaking notes:
```
Int4WeightPreshuffledFakeQuantizeConfig -&gt; Int4WeightFakeQuantizeConfig
Int4WeightPreshuffledFakeQuantizer -&gt; Int4WeightFakeQuantizer
```

**Test Plan:**
```
python test/quantization/test_qat.py
```
diff --git a/test/quantization/test_qat.py b/test/quantization/test_qat.py
@@ -50,7 +50,7 @@
 )
 from torchao.quantization.qat.fake_quantize_config import (
     Float8FakeQuantizeConfig,
-    Int4WeightPreshuffledFakeQuantizeConfig,
+    Int4WeightFakeQuantizeConfig,
     IntxFakeQuantizeConfig,
 )
 from torchao.quantization.qat.fake_quantizer import (
@@ -1985,7 +1985,7 @@ def test_infer_fp8_int4_config(self):
         self.assertIsInstance(act_config, Float8FakeQuantizeConfig)
         self.assertEqual(act_config.dtype, e4m3_dtype)
         self.assertIsInstance(act_config.granularity, PerRow)
-        self.assertIsInstance(weight_config, Int4WeightPreshuffledFakeQuantizeConfig)
+        self.assertIsInstance(weight_config, Int4WeightFakeQuantizeConfig)
         self.assertEqual(weight_config.group_size, 128)
         self.assertEqual(weight_config.activation_dtype, e4m3_dtype)
 
@@ -2008,7 +2008,7 @@ def test_infer_int4_weight_only_config(self):
         base_config = Int4WeightOnlyConfig(version=2)
         (act_config, weight_config) = _infer_fake_quantize_configs(base_config)
         self.assertIsNone(act_config)
-        self.assertIsInstance(weight_config, Int4WeightPreshuffledFakeQuantizeConfig)
+        self.assertIsInstance(weight_config, Int4WeightFakeQuantizeConfig)
         self.assertEqual(weight_config.group_size, 128)
         self.assertEqual(weight_config.activation_dtype, torch.bfloat16)
 
@@ -2102,7 +2102,7 @@ def test_fbgemm_fp8_int4_preshuffled_primitives(self):
         """
         Compare numerics between:
             (1) fbgemm_gpu.experimental.gen_ai.quantize.quantize_int4_preshuffle
-            (2) Our reference QAT version in `Int4WeightPreshuffledFakeQuantizer`
+            (2) Our reference QAT version in `Int4WeightFakeQuantizer`
         """
         from fbgemm_gpu.experimental.gen_ai.quantize import (
             int4_row_quantize,
@@ -2184,7 +2184,7 @@ def test_fbgemm_int4_weight_only_primitives(self):
         """
         Compare numerics between:
             (1) fbgemm_gpu.experimental.gen_ai.quantize.int4_row_quantize_zp
-            (2) Our reference QAT version in `Int4WeightPreshuffledFakeQuantizer`
+            (2) Our reference QAT version in `Int4WeightFakeQuantizer`
         """
         from fbgemm_gpu.experimental.gen_ai.quantize import (
             int4_row_quantize_zp,
diff --git a/torchao/quantization/qat/fake_quantize_config.py b/torchao/quantization/qat/fake_quantize_config.py
@@ -78,9 +78,8 @@ def __post_init__(self):
             )
 
 
-# TODO: rename this config, it actually works for both plain and preshuffled
 @dataclass
-class Int4WeightPreshuffledFakeQuantizeConfig(FakeQuantizeConfigBase):
+class Int4WeightFakeQuantizeConfig(FakeQuantizeConfigBase):
     """
     Config for pint4 weight fake quantization that targets the numerics in the following preshuffled kernel:
         torch.ops.fbgemm.f8i4bf16_shuffled
@@ -393,7 +392,7 @@ def _infer_fake_quantize_configs(
                 raise ValueError(
                     f"Packing format must be one of {supported_packing_formats}"
                 )
-            weight_config = Int4WeightPreshuffledFakeQuantizeConfig(
+            weight_config = Int4WeightFakeQuantizeConfig(
                 group_size=128,
                 activation_dtype=torch.bfloat16,
             )
@@ -436,7 +435,7 @@ def _infer_fake_quantize_configs(
             dtype=e4m3_dtype,
             granularity=PerRow(),
         )
-        weight_config = Int4WeightPreshuffledFakeQuantizeConfig(
+        weight_config = Int4WeightFakeQuantizeConfig(
             group_size=128,
             activation_dtype=e4m3_dtype,
         )
diff --git a/torchao/quantization/qat/fake_quantizer.py b/torchao/quantization/qat/fake_quantizer.py
@@ -35,7 +35,7 @@
 from .fake_quantize_config import (
     FakeQuantizeConfigBase,
     Float8FakeQuantizeConfig,
-    Int4WeightPreshuffledFakeQuantizeConfig,
+    Int4WeightFakeQuantizeConfig,
     IntxFakeQuantizeConfig,
 )
 from .utils import (
@@ -68,8 +68,8 @@ def from_config(config: FakeQuantizeConfigBase) -> "FakeQuantizerBase":
 
         if isinstance(config, IntxFakeQuantizeConfig):
             return IntxFakeQuantizer(config)
-        elif isinstance(config, Int4WeightPreshuffledFakeQuantizeConfig):
-            return Int4WeightPreshuffledFakeQuantizer(config)
+        elif isinstance(config, Int4WeightFakeQuantizeConfig):
+            return Int4WeightFakeQuantizer(config)
         elif isinstance(config, Float8FakeQuantizeConfig):
             return Float8FakeQuantizer(config)
         elif isinstance(config, NVFP4FakeQuantizeConfig):
@@ -103,8 +103,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
         return dq
 
 
-# TODO: rename this, it also works for plain Int4Tensor
-class Int4WeightPreshuffledFakeQuantizer(FakeQuantizerBase):
+class Int4WeightFakeQuantizer(FakeQuantizerBase):
     """
     Generic module for applying int4 fake quantization to a weight tensor,
     targeting the following FBGEMM kernels:
@@ -113,11 +112,11 @@ class Int4WeightPreshuffledFakeQuantizer(FakeQuantizerBase):
         torch.ops.fbgemm.bf16i4bf16_rowwise
     """
 
-    def __init__(self, config: Int4WeightPreshuffledFakeQuantizeConfig):
+    def __init__(self, config: Int4WeightFakeQuantizeConfig):
         super().__init__()
         self.config = config
         torch._C._log_api_usage_once(
-            "torchao.quantization.qat.Int4WeightPreshuffledFakeQuantizer"
+            "torchao.quantization.qat.Int4WeightFakeQuantizer"
         )
 
     def forward(self, w: torch.Tensor) -> torch.Tensor:

Original file line number	Diff line number	Diff line change
`@@ -78,9 +78,8 @@ def __post_init__(self):`
`78`	`78`	`)`
`79`	`79`
`80`	`80`
`81`		`-# TODO: rename this config, it actually works for both plain and preshuffled`
`82`	`81`	`@dataclass`
`83`		`-class Int4WeightPreshuffledFakeQuantizeConfig(FakeQuantizeConfigBase):`
	`82`	`+class Int4WeightFakeQuantizeConfig(FakeQuantizeConfigBase):`
`84`	`83`	`"""`
`85`	`84`	`Config for pint4 weight fake quantization that targets the numerics in the following preshuffled kernel:`
`86`	`85`	`torch.ops.fbgemm.f8i4bf16_shuffled`
`@@ -393,7 +392,7 @@ def _infer_fake_quantize_configs(`
`393`	`392`	`raise ValueError(`
`394`	`393`	`f"Packing format must be one of {supported_packing_formats}"`
`395`	`394`	`)`
`396`		`- weight_config = Int4WeightPreshuffledFakeQuantizeConfig(`
	`395`	`+ weight_config = Int4WeightFakeQuantizeConfig(`
`397`	`396`	`group_size=128,`
`398`	`397`	`activation_dtype=torch.bfloat16,`
`399`	`398`	`)`
`@@ -436,7 +435,7 @@ def _infer_fake_quantize_configs(`
`436`	`435`	`dtype=e4m3_dtype,`
`437`	`436`	`granularity=PerRow(),`
`438`	`437`	`)`
`439`		`- weight_config = Int4WeightPreshuffledFakeQuantizeConfig(`
	`438`	`+ weight_config = Int4WeightFakeQuantizeConfig(`
`440`	`439`	`group_size=128,`
`441`	`440`	`activation_dtype=e4m3_dtype,`
`442`	`441`	`)`