openvinotoolkit · AlexanderDokuchaev · Apr 15, 2025 · Apr 15, 2025 · Apr 15, 2025
@@ -146,7 +146,6 @@ quantized_model = nncf.quantize(model, calibration_dataset)
 import nncf
 import torch.fx
 from torchvision import datasets, models
-from nncf.torch import disable_patching
 
 # Instantiate your uncompressed model
 model = models.mobilenet_v2()
@@ -165,15 +164,13 @@ calibration_dataset = nncf.Dataset(dataset_loader, transform_fn)
 
 # Step 3: Export model to TorchFX
 input_shape = (1, 3, 224, 224)
-with nncf.torch.disable_patching():
-    fx_model = torch.export.export_for_training(model, args=(ex_input,)).module()
-    # or
-    # fx_model = torch.export.export(model, args=(ex_input,)).module()
+fx_model = torch.export.export_for_training(model, args=(ex_input,)).module()
+# or
+# fx_model = torch.export.export(model, args=(ex_input,)).module()
 
-    # Step 4: Run the quantization pipeline
-    quantized_fx_model = nncf.quantize(fx_model, calibration_dataset)
-
- ```
+# Step 4: Run the quantization pipeline
+quantized_fx_model = nncf.quantize(fx_model, calibration_dataset)
+```
 
 </details>
 <details><summary><b>TensorFlow</b></summary>

@@ -33,7 +33,6 @@
 import nncf.torch
 from nncf.common.utils.helpers import create_table
 from nncf.common.utils.os import is_windows
-from nncf.torch import disable_patching
 
 IMAGE_SIZE = 64
 
@@ -193,12 +192,11 @@ def transform_fn(data_item):
     input_shape = (1, 3, IMAGE_SIZE, IMAGE_SIZE)
     example_input = torch.ones(*input_shape).to(device)
 
-    with disable_patching():
-        fx_model = torch.export.export_for_training(model.eval(), args=(example_input,)).module()
-        quantized_fx_model = nncf.quantize(fx_model, quantization_dataset)
-        quantized_fx_model = torch.compile(quantized_fx_model, backend="openvino")
+    fx_model = torch.export.export_for_training(model.eval(), args=(example_input,)).module()
+    quantized_fx_model = nncf.quantize(fx_model, quantization_dataset)
+    quantized_fx_model = torch.compile(quantized_fx_model, backend="openvino")
 
-        acc1_int8 = validate(val_loader, quantized_fx_model, device)
+    acc1_int8 = validate(val_loader, quantized_fx_model, device)
 
     print(f"Accuracy@1 of INT8 model: {acc1_int8:.3f}")
     print(f"Accuracy diff FP32 - INT8: {acc1_fp32 - acc1_int8:.3f}")
@@ -207,29 +205,26 @@ def transform_fn(data_item):
     # Step 3: Run benchmarks
     print(os.linesep + "[Step 3] Run benchmarks")
     print("Benchmark FP32 model compiled with default backend ...")
-    with disable_patching():
-        compiled_model = torch.compile(model)
-        try:
-            fp32_latency = measure_latency(compiled_model, example_inputs=example_input)
-        except BackendCompilerFailed as exp:
-            if not is_windows():
-                raise exp
-            print(
-                "WARNING: Torch Inductor is currently unavailable on Windows. "
-                "For more information, visit https://github.com/pytorch/pytorch/issues/135954"
-            )
-            fp32_latency = float("nan")
+    compiled_model = torch.compile(model)
+    try:
+        fp32_latency = measure_latency(compiled_model, example_inputs=example_input)
+    except BackendCompilerFailed as exp:
+        if not is_windows():
+            raise exp
+        print(
+            "WARNING: Torch Inductor is currently unavailable on Windows. "
+            "For more information, visit https://github.com/pytorch/pytorch/issues/135954"
+        )
+        fp32_latency = float("nan")
     print(f"{fp32_latency:.3f} ms")
 
     print("Benchmark FP32 model compiled with openvino backend ...")
-    with disable_patching():
-        compiled_model = torch.compile(model, backend="openvino")
-        fp32_ov_latency = measure_latency(compiled_model, example_inputs=example_input)
+    compiled_model = torch.compile(model, backend="openvino")
+    fp32_ov_latency = measure_latency(compiled_model, example_inputs=example_input)
     print(f"{fp32_ov_latency:.3f} ms")
 
     print("Benchmark INT8 model compiled with openvino backend ...")
-    with disable_patching():
-        int8_latency = measure_latency(quantized_fx_model, example_inputs=example_input)
+    int8_latency = measure_latency(quantized_fx_model, example_inputs=example_input)
     print(f"{int8_latency:.3f} ms")
 
     print("[Step 4] Summary:")

@@ -18,7 +18,6 @@
 import torch
 from torchvision import models
 
-from nncf.torch import disable_patching
 from tests.post_training.pipelines.base import FX_BACKENDS
 from tests.post_training.pipelines.base import PT_BACKENDS
 from tests.post_training.pipelines.base import BackendType
@@ -81,11 +80,10 @@ def prepare_model(self) -> None:
 
         if self.backend in FX_BACKENDS:
             with torch.no_grad():
-                with disable_patching():
-                    if self.backend is BackendType.CUDA_FX_TORCH:
-                        model = model.cuda()
-                        self.dummy_tensor = self.dummy_tensor.cuda()
-                    self.model = self.model_params.export_fn(model, (self.dummy_tensor,))
+                if self.backend is BackendType.CUDA_FX_TORCH:
+                    model = model.cuda()
+                    self.dummy_tensor = self.dummy_tensor.cuda()
+                self.model = self.model_params.export_fn(model, (self.dummy_tensor,))
 
         elif self.backend in PT_BACKENDS:
             self.model = model
@@ -105,8 +103,7 @@ def prepare_model(self) -> None:
         elif self.backend in [BackendType.OV, BackendType.FP32]:
             with torch.no_grad():
                 if self.model_params.export_torch_before_ov_convert:
-                    with disable_patching():
-                        model = torch.export.export(model, (self.dummy_tensor,))
+                    model = torch.export.export(model, (self.dummy_tensor,))
                 self.model = ov.convert_model(model, example_input=self.dummy_tensor, input=self.input_size)
             self.input_name = list(inp.get_any_name() for inp in self.model.inputs)[0]
 
@@ -120,12 +117,11 @@ def prepare_model(self) -> None:
     def _dump_model_fp32(self) -> None:
         """Dump IRs of fp32 models, to help debugging."""
         if self.backend in PT_BACKENDS:
-            with disable_patching():
-                ov_model = ov.convert_model(
-                    self.model,
-                    example_input=self.dummy_tensor,
-                    input=self.input_size,
-                )
+            ov_model = ov.convert_model(
+                self.model,
+                example_input=self.dummy_tensor,
+                input=self.input_size,
+            )
             ov.serialize(ov_model, self.fp32_model_dir / "model_fp32.xml")
 
         if self.backend in FX_BACKENDS: