Addressed MR comments

kinjalpatel27 · kinjalpatel27 · commit ca5534883b9b · 2025-10-17T00:05:18.000Z
Signed-off-by: Kinjal Patel &lt;kinjalpravin@nvidia.com&gt;
diff --git a/modelopt/torch/quantization/model_calib.py b/modelopt/torch/quantization/model_calib.py
@@ -118,7 +118,7 @@ def sync_quantizer_amax_across_tp(
         # Syncing amax across TP for sequential quantizer
         if isinstance(quantizer, SequentialQuantizer):
             for _q in quantizer:
-                "Syncing amax across TP for sequential quantizer"
+                # Syncing amax across TP for sequential quantizer
                 sync_quantizer_amax_across_tp(
                     _q, linear_name, quantizer_type, axes_for_sync, parallel_state
                 )
diff --git a/modelopt/torch/quantization/plugins/transformer_engine.py b/modelopt/torch/quantization/plugins/transformer_engine.py
@@ -73,6 +73,7 @@ def _setup(self):
         # GroupedMLP stores the weights as weight0, weight1, etc. To run setup in order to
         # initialize the quantizer states, self.weight is used to extract shape, dtype etc. Assigning
         # self.weight0 to self.weight to run the quantizer states initialization.
+        assert not hasattr(self, "weight"), "self.weight should not exist for TEGroupedLinear"
         self.weight = self.weight0
         # Memorize the original weight.dtype for modelopt_post_restore given that
         # the dtype can change later.
@@ -84,6 +85,7 @@ def modelopt_post_restore(self, prefix: str = ""):
         # GroupedMLP stores the weights as weight0, weight1, etc. To run post_restore in order to
         # initialize the quantizer states, self.weight is used to extract shape, dtype etc. Assigning
         # self.weight0 to self.weight to run the quantizer states initialization.
+        assert not hasattr(self, "weight"), "self.weight should not exist for TEGroupedLinear"
         self.weight = self.weight0
         super().modelopt_post_restore(prefix=prefix)
         # Remove self.weight after post_restore.
diff --git a/modelopt/torch/quantization/utils.py b/modelopt/torch/quantization/utils.py
@@ -253,6 +253,7 @@ def is_quantized_linear(module):
         and hasattr(module, "weight_quantizer")
         and (
             (getattr(module, "weight", None) is not None and module.weight.dim() == 2)
+            # module.weight0 check is required to support TEGroupedLinear
             or (getattr(module, "weight0", None) is not None and module.weight0.dim() == 2)
         )
     )

Original file line number	Diff line number	Diff line change
`@@ -118,7 +118,7 @@ def sync_quantizer_amax_across_tp(`
`118`	`118`	`# Syncing amax across TP for sequential quantizer`
`119`	`119`	`if isinstance(quantizer, SequentialQuantizer):`
`120`	`120`	`for _q in quantizer:`
`121`		`- "Syncing amax across TP for sequential quantizer"`
	`121`	`+ # Syncing amax across TP for sequential quantizer`
`122`	`122`	`sync_quantizer_amax_across_tp(`
`123`	`123`	`_q, linear_name, quantizer_type, axes_for_sync, parallel_state`
`124`	`124`	`)`
Original file line number	Diff line number	Diff line change
`@@ -253,6 +253,7 @@ def is_quantized_linear(module):`
`253`	`253`	`and hasattr(module, "weight_quantizer")`
`254`	`254`	`and (`
`255`	`255`	`(getattr(module, "weight", None) is not None and module.weight.dim() == 2)`
	`256`	`+ # module.weight0 check is required to support TEGroupedLinear`
`256`	`257`	`or (getattr(module, "weight0", None) is not None and module.weight0.dim() == 2)`
`257`	`258`	`)`
`258`	`259`	`)`