Update torchao_convert to match notebook

lisjin · lisjin · commit af5fb9720abf · 2025-10-02T07:58:12.000-07:00
diff --git a/torchao/prototype/parq/optim/quantopt.py b/torchao/prototype/parq/optim/quantopt.py
@@ -163,17 +163,24 @@ def _get_quantizer(self, group_idx: int) -> Optional[Quantizer]:
     def torchao_convert(self, model: nn.Module, weight_only: bool = False) -> None:
         """Converts model parameters to torchao quantized tensor subclasses."""
         model.eval()
-        self.restore_latent_params()
 
         # TODO(lvj): find more robust way to identify embedding layers
         embed_data_ptrs = set()
         linear_data_ptrs = set()
+        embed_modules = []
         for module in model.modules():
             if isinstance(module, nn.Embedding):
+                embed_modules.append(module)
                 embed_data_ptrs.add(module.weight.data_ptr())
             elif _is_linear(module) and module.weight.data_ptr() not in embed_data_ptrs:
                 linear_data_ptrs.add(module.weight.data_ptr())
 
+        tied_embeddings = getattr(model, "_tied_weights_keys", None) is not None
+        if tied_embeddings:
+            # Workaround for dynamic activations on tied embeddings
+            for module in embed_modules:
+                setattr(module, "bias", None)
+
         filter_fns = []
         configs = []
         attach_hf_config = _is_hf_model(model)
@@ -194,7 +201,7 @@ def torchao_convert(self, model: nn.Module, weight_only: bool = False) -> None:
             any_embed = any(p.data_ptr() in embed_data_ptrs for p in group["params"])
             config = _get_config_from_quantizer(
                 quantizer,
-                weight_only or any_embed,
+                weight_only or (any_embed and not tied_embeddings),
                 device,
                 group["quant_bits"],
                 group.get("quant_block_size"),