Rename Int4WeightPreshuffledFakeQuantizeConfig (#3005)

andrewor14 · web-flow · commit 58c3064e5f5c · 2025-09-16T17:12:28.000-04:00
**Summary:** This config actually works for both preshuffled and
plain int4 QAT, so we remove "Preshuffled" from the name.

BC-breaking notes:
```
Int4WeightPreshuffledFakeQuantizeConfig -&gt; Int4WeightFakeQuantizeConfig
Int4WeightPreshuffledFakeQuantizer -&gt; Int4WeightFakeQuantizer
```

**Test Plan:**
```
python test/quantization/test_qat.py
```
diff --git a/test/quantization/test_qat.py b/test/quantization/test_qat.py
@@ -50,7 +50,7 @@
 )
 from torchao.quantization.qat.fake_quantize_config import (
     Float8FakeQuantizeConfig,
-    Int4WeightPreshuffledFakeQuantizeConfig,
+    Int4WeightFakeQuantizeConfig,
     IntxFakeQuantizeConfig,
 )
 from torchao.quantization.qat.fake_quantizer import (
@@ -2049,7 +2049,7 @@ def test_infer_fp8_int4_config(self):
         self.assertIsInstance(act_config, Float8FakeQuantizeConfig)
         self.assertEqual(act_config.dtype, e4m3_dtype)
         self.assertIsInstance(act_config.granularity, PerRow)
-        self.assertIsInstance(weight_config, Int4WeightPreshuffledFakeQuantizeConfig)
+        self.assertIsInstance(weight_config, Int4WeightFakeQuantizeConfig)
         self.assertEqual(weight_config.group_size, 128)
         self.assertEqual(weight_config.activation_dtype, e4m3_dtype)
 
@@ -2072,7 +2072,7 @@ def test_infer_int4_weight_only_config(self):
         base_config = Int4WeightOnlyConfig(version=2)
         (act_config, weight_config) = _infer_fake_quantize_configs(base_config)
         self.assertIsNone(act_config)
-        self.assertIsInstance(weight_config, Int4WeightPreshuffledFakeQuantizeConfig)
+        self.assertIsInstance(weight_config, Int4WeightFakeQuantizeConfig)
         self.assertEqual(weight_config.group_size, 128)
         self.assertEqual(weight_config.activation_dtype, torch.bfloat16)
 
@@ -2166,7 +2166,7 @@ def test_fbgemm_fp8_int4_preshuffled_primitives(self):
         """
         Compare numerics between:
             (1) fbgemm_gpu.experimental.gen_ai.quantize.quantize_int4_preshuffle
-            (2) Our reference QAT version in `Int4WeightPreshuffledFakeQuantizer`
+            (2) Our reference QAT version in `Int4WeightFakeQuantizer`
         """
         from fbgemm_gpu.experimental.gen_ai.quantize import (
             int4_row_quantize,
@@ -2248,7 +2248,7 @@ def test_fbgemm_int4_weight_only_primitives(self):
         """
         Compare numerics between:
             (1) fbgemm_gpu.experimental.gen_ai.quantize.int4_row_quantize_zp
-            (2) Our reference QAT version in `Int4WeightPreshuffledFakeQuantizer`
+            (2) Our reference QAT version in `Int4WeightFakeQuantizer`
         """
         from fbgemm_gpu.experimental.gen_ai.quantize import (
             int4_row_quantize_zp,
diff --git a/torchao/quantization/qat/fake_quantize_config.py b/torchao/quantization/qat/fake_quantize_config.py
@@ -78,9 +78,8 @@ def __post_init__(self):
             )
 
 
-# TODO: rename this config, it actually works for both plain and preshuffled
 @dataclass
-class Int4WeightPreshuffledFakeQuantizeConfig(FakeQuantizeConfigBase):
+class Int4WeightFakeQuantizeConfig(FakeQuantizeConfigBase):
     """
     Config for pint4 weight fake quantization that targets the numerics in the following preshuffled kernel:
         torch.ops.fbgemm.f8i4bf16_shuffled
@@ -395,7 +394,7 @@ def _infer_fake_quantize_configs(
                 raise ValueError(
                     f"Packing format must be one of {supported_packing_formats}"
                 )
-            weight_config = Int4WeightPreshuffledFakeQuantizeConfig(
+            weight_config = Int4WeightFakeQuantizeConfig(
                 group_size=128,
                 activation_dtype=torch.bfloat16,
             )
@@ -438,7 +437,7 @@ def _infer_fake_quantize_configs(
             dtype=e4m3_dtype,
             granularity=PerRow(),
         )
-        weight_config = Int4WeightPreshuffledFakeQuantizeConfig(
+        weight_config = Int4WeightFakeQuantizeConfig(
             group_size=128,
             activation_dtype=e4m3_dtype,
         )
diff --git a/torchao/quantization/qat/fake_quantizer.py b/torchao/quantization/qat/fake_quantizer.py
@@ -35,7 +35,7 @@
 from .fake_quantize_config import (
     FakeQuantizeConfigBase,
     Float8FakeQuantizeConfig,
-    Int4WeightPreshuffledFakeQuantizeConfig,
+    Int4WeightFakeQuantizeConfig,
     IntxFakeQuantizeConfig,
 )
 from .utils import (
@@ -68,8 +68,8 @@ def from_config(config: FakeQuantizeConfigBase) -> "FakeQuantizerBase":
 
         if isinstance(config, IntxFakeQuantizeConfig):
             return IntxFakeQuantizer(config)
-        elif isinstance(config, Int4WeightPreshuffledFakeQuantizeConfig):
-            return Int4WeightPreshuffledFakeQuantizer(config)
+        elif isinstance(config, Int4WeightFakeQuantizeConfig):
+            return Int4WeightFakeQuantizer(config)
         elif isinstance(config, Float8FakeQuantizeConfig):
             return Float8FakeQuantizer(config)
         elif isinstance(config, NVFP4FakeQuantizeConfig):
@@ -103,8 +103,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
         return dq
 
 
-# TODO: rename this, it also works for plain Int4Tensor
-class Int4WeightPreshuffledFakeQuantizer(FakeQuantizerBase):
+class Int4WeightFakeQuantizer(FakeQuantizerBase):
     """
     Generic module for applying int4 fake quantization to a weight tensor,
     targeting the following FBGEMM kernels:
@@ -113,12 +112,10 @@ class Int4WeightPreshuffledFakeQuantizer(FakeQuantizerBase):
         torch.ops.fbgemm.bf16i4bf16_rowwise
     """
 
-    def __init__(self, config: Int4WeightPreshuffledFakeQuantizeConfig):
+    def __init__(self, config: Int4WeightFakeQuantizeConfig):
         super().__init__()
         self.config = config
-        torch._C._log_api_usage_once(
-            "torchao.quantization.qat.Int4WeightPreshuffledFakeQuantizer"
-        )
+        torch._C._log_api_usage_once("torchao.quantization.qat.Int4WeightFakeQuantizer")
 
     def forward(self, w: torch.Tensor) -> torch.Tensor:
         if self.config.activation_dtype == torch.float8_e4m3fn:

Original file line number	Diff line number	Diff line change
`@@ -78,9 +78,8 @@ def __post_init__(self):`
`78`	`78`	`)`
`79`	`79`
`80`	`80`
`81`		`-# TODO: rename this config, it actually works for both plain and preshuffled`
`82`	`81`	`@dataclass`
`83`		`-class Int4WeightPreshuffledFakeQuantizeConfig(FakeQuantizeConfigBase):`
	`82`	`+class Int4WeightFakeQuantizeConfig(FakeQuantizeConfigBase):`
`84`	`83`	`"""`
`85`	`84`	`Config for pint4 weight fake quantization that targets the numerics in the following preshuffled kernel:`
`86`	`85`	`torch.ops.fbgemm.f8i4bf16_shuffled`
`@@ -395,7 +394,7 @@ def _infer_fake_quantize_configs(`
`395`	`394`	`raise ValueError(`
`396`	`395`	`f"Packing format must be one of {supported_packing_formats}"`
`397`	`396`	`)`
`398`		`- weight_config = Int4WeightPreshuffledFakeQuantizeConfig(`
	`397`	`+ weight_config = Int4WeightFakeQuantizeConfig(`
`399`	`398`	`group_size=128,`
`400`	`399`	`activation_dtype=torch.bfloat16,`
`401`	`400`	`)`
`@@ -438,7 +437,7 @@ def _infer_fake_quantize_configs(`
`438`	`437`	`dtype=e4m3_dtype,`
`439`	`438`	`granularity=PerRow(),`
`440`	`439`	`)`
`441`		`- weight_config = Int4WeightPreshuffledFakeQuantizeConfig(`
	`440`	`+ weight_config = Int4WeightFakeQuantizeConfig(`
`442`	`441`	`group_size=128,`
`443`	`442`	`activation_dtype=e4m3_dtype,`
`444`	`443`	`)`