Remove unwrap_tensor_subclass

jackzhxng · jackzhxng · commit 1a36a0cdd481 · 2025-11-05T11:09:30.000-08:00
diff --git a/.ci/docker/ci_commit_pins/pytorch.txt b/.ci/docker/ci_commit_pins/pytorch.txt
@@ -1 +1 @@
-e6f766c7d750d40603eee3f66c5915bac606b3ea
+72df1db744431d24ee1a4c0e42e514426ce0d45f
diff --git a/backends/test/harness/stages/quantize.py b/backends/test/harness/stages/quantize.py
@@ -16,7 +16,6 @@
 )
 from torchao.quantization.pt2e.quantizer import Quantizer
 from torchao.quantization.quant_api import quantize_
-from torchao.utils import unwrap_tensor_subclass
 
 
 class Quantize(Stage):
@@ -111,9 +110,6 @@ def run(
         # Apply quantize_ to the model
         quantize_(artifact, self.config, self.filter_fn)
 
-        # Unwrap tensor subclasses for export compatibility
-        unwrap_tensor_subclass(artifact)
-
         self.quantized_module = artifact
 
     @property
diff --git a/backends/vulkan/test/test_vulkan_delegate.py b/backends/vulkan/test/test_vulkan_delegate.py
@@ -43,7 +43,6 @@
 
 from torchao.quantization.pt2e.quantizer import Quantizer
 from torchao.quantization.quant_api import IntxWeightOnlyConfig, quantize_
-from torchao.utils import unwrap_tensor_subclass
 
 try:
     ctypes.CDLL("libvulkan.so.1")
@@ -2363,7 +2362,6 @@ def apply_quantization(self):
                     granularity=self.quant_granularity,
                 )
                 quantize_(self, q_config)
-                unwrap_tensor_subclass(self)
                 return self
 
         # Test with GEMV pattern (batch_size=1, seq_len=1)
@@ -2686,15 +2684,13 @@ def apply_8da4w_quantization(self):
                     quantize_,
                 )
                 from torchao.quantization.granularity import PerGroup
-                from torchao.utils import unwrap_tensor_subclass
 
                 quantize_(
                     self,
                     Int8DynamicActivationIntxWeightConfig(
                         weight_dtype=torch.int4, granularity=PerGroup(self.group_size)
                     ),
                 )
-                unwrap_tensor_subclass(self)
                 return self
 
         # Test with GEMV pattern (batch_size=1, seq_len=1)
diff --git a/backends/xnnpack/test/ops/test_linear.py b/backends/xnnpack/test/ops/test_linear.py
@@ -39,7 +39,6 @@
         Int8DynamicActivationIntxWeightConfig,
         quantize_,
     )
-    from torchao.utils import unwrap_tensor_subclass
 
     torchao_installed = True
 except:
@@ -400,7 +399,6 @@ def _test_groupwise_dq_linear(
                 weight_granularity=PerGroup(group_size),
             ),
         )
-        unwrap_tensor_subclass(mod)
         DynamicallyQuantizedPartitioner = XnnpackPartitioner(
             config_precisions=ConfigPrecisionType.DYNAMIC_QUANT,
             per_op_mode=True,
diff --git a/examples/apple/coreml/llama/export.py b/examples/apple/coreml/llama/export.py
@@ -28,7 +28,6 @@
 
 from torchao.quantization.granularity import PerAxis, PerGroup
 from torchao.quantization.quant_api import IntxWeightOnlyConfig, quantize_
-from torchao.utils import unwrap_tensor_subclass
 
 
 def main() -> None:
@@ -193,8 +192,6 @@ def main() -> None:
     )
     example_inputs = input_manager.get_inputs(tokens=[0])
 
-    model = unwrap_tensor_subclass(model)
-
     ep = torch.export.export(model, example_inputs, strict=True)
     print("Exported program")
     print(ep)
diff --git a/examples/models/llama/source_transformation/quantize.py b/examples/models/llama/source_transformation/quantize.py
@@ -122,7 +122,6 @@ def quantize(  # noqa C901
             Int8DynamicActivationIntxWeightConfig,
             quantize_,
         )
-        from torchao.utils import unwrap_tensor_subclass
 
         with torch.no_grad():
             # Computation dtype is fixed to fp32 in the implementation of quantize_, so
@@ -142,7 +141,6 @@ def quantize(  # noqa C901
                     ),
                 ),
             )
-            model = unwrap_tensor_subclass(model)
         if verbose:
             print("quantized model:", model)
         return model
@@ -156,7 +154,6 @@ def quantize(  # noqa C901
             quantize_,
         )
         from torchao.quantization.granularity import PerGroup
-        from torchao.utils import unwrap_tensor_subclass
 
         def filter_fn(m, fqn):
             is_linear = isinstance(m, nn.Linear)
@@ -181,8 +178,6 @@ def filter_fn(m, fqn):
             filter_fn=filter_fn,
         )
 
-        model = unwrap_tensor_subclass(model)
-
         # TODO: deal with checkpoint / computation dtype decoupling.
 
         if verbose:
@@ -191,7 +186,6 @@ def filter_fn(m, fqn):
     elif qmode == "4w":
         from torchao.quantization.granularity import PerGroup
         from torchao.quantization.quant_api import IntxWeightOnlyConfig, quantize_
-        from torchao.utils import unwrap_tensor_subclass
 
         q_group_size = 256 if group_size is None else group_size
         q_config = IntxWeightOnlyConfig(
@@ -204,7 +198,6 @@ def filter_fn(m, fqn):
             ),
         )
         quantize_(model, q_config)
-        model = unwrap_tensor_subclass(model)
 
         return model
     else:
diff --git a/export/stages.py b/export/stages.py
@@ -26,7 +26,6 @@
     ComposableQuantizer,
     Quantizer as TorchAOPT2EQuantizer,
 )
-from torchao.utils import unwrap_tensor_subclass
 
 
 class PipelineArtifact:
@@ -344,7 +343,6 @@ def run(self, artifact: PipelineArtifact) -> None:
 
             ao_config = self._quantization_recipe.ao_quantization_configs[0]
             quantize_(model, ao_config.ao_base_config, ao_config.filter_fn)
-            unwrap_tensor_subclass(model)
 
         self._artifact = artifact.copy_with_new_data(self._transformed_models)
 
diff --git a/export/tests/test_export_stages.py b/export/tests/test_export_stages.py
@@ -280,10 +280,7 @@ def test_source_transform_stage_no_quantization(self) -> None:
         self.assertEqual(result_artifact.data, self.models_dict)
 
     @patch("executorch.export.stages.quantize_")
-    @patch("executorch.export.stages.unwrap_tensor_subclass")
-    def test_run_with_ao_quantization_configs(
-        self, mock_unwrap: Mock, mock_quantize: Mock
-    ) -> None:
+    def test_run_with_ao_quantization_configs(self, mock_quantize: Mock) -> None:
         from torchao.core.config import AOBaseConfig
 
         mock_config = Mock(spec=AOBaseConfig)
@@ -308,9 +305,6 @@ def test_run_with_ao_quantization_configs(
         self.assertEqual(call_args[1], mock_config)
         self.assertEqual(call_args[2], mock_filter_fn)
 
-        # Verify unwrap_tensor_subclass was called once (with the copied model)
-        self.assertEqual(mock_unwrap.call_count, 1)
-
         # Verify that the original models_dict is unchanged
         self.assertEqual(models_dict, {"forward": self.model})
 
diff --git a/extension/llm/export/builder.py b/extension/llm/export/builder.py
@@ -38,7 +38,6 @@
 from torch.nn.attention import SDPBackend
 from torchao.quantization.pt2e.quantize_pt2e import convert_pt2e, prepare_pt2e
 from torchao.quantization.pt2e.quantizer import ComposableQuantizer, Quantizer
-from torchao.utils import unwrap_tensor_subclass
 
 FORMAT = "[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s"
 logging.basicConfig(level=logging.INFO, format=FORMAT)
@@ -203,11 +202,6 @@ def _get_edge_config(self) -> EdgeCompileConfig:
         return edge_config
 
     def _export(self, module: Optional[torch.nn.Module] = None) -> ExportedProgram:
-        if module is not None:
-            unwrap_tensor_subclass(module)
-        else:
-            unwrap_tensor_subclass(self.model)
-
         dynamic_shape = self._get_dynamic_shape()
         # 1. torch.nn.attention.sdpa_kernel([SDPBackend.MATH]) is for bypassing the dynamo error when tracing
         # 2. torch.no_grad() is for getting rid of the dropout (not sure why training ops will show up)
diff --git a/torch_pin.py b/torch_pin.py
@@ -1,2 +1,2 @@
 TORCH_VERSION = "2.10.0"
-NIGHTLY_VERSION = "dev20251015"
+NIGHTLY_VERSION = "dev20251104"

Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-e6f766c7d750d40603eee3f66c5915bac606b3ea`
	`1`	`+72df1db744431d24ee1a4c0e42e514426ce0d45f`
Original file line number	Diff line number	Diff line change
`@@ -39,7 +39,6 @@`
`39`	`39`	`Int8DynamicActivationIntxWeightConfig,`
`40`	`40`	`quantize_,`
`41`	`41`	`)`
`42`		`- from torchao.utils import unwrap_tensor_subclass`
`43`	`42`
`44`	`43`	`torchao_installed = True`
`45`	`44`	`except:`
`@@ -400,7 +399,6 @@ def _test_groupwise_dq_linear(`
`400`	`399`	`weight_granularity=PerGroup(group_size),`
`401`	`400`	`),`
`402`	`401`	`)`
`403`		`- unwrap_tensor_subclass(mod)`
`404`	`402`	`DynamicallyQuantizedPartitioner = XnnpackPartitioner(`
`405`	`403`	`config_precisions=ConfigPrecisionType.DYNAMIC_QUANT,`
`406`	`404`	`per_op_mode=True,`
Original file line number	Diff line number	Diff line change
`@@ -1,2 +1,2 @@`
`1`	`1`	`TORCH_VERSION = "2.10.0"`
`2`		`-NIGHTLY_VERSION = "dev20251015"`
	`2`	`+NIGHTLY_VERSION = "dev20251104"`