Merge branch 'kaihui/ar_v02_3x' of https://github.com/intel/neural-co…

…mpressor into kaihui/ar_v02_3x
intel · May 23, 2024 · 3f3b2fd · 3f3b2fd
2 parents 6be70fa + 78af942
commit 3f3b2fd
Show file tree

Hide file tree

Showing 2 changed files with 7 additions and 7 deletions.
diff --git a/neural_compressor/torch/algorithms/weight_only/autoround.py b/neural_compressor/torch/algorithms/weight_only/autoround.py
@@ -12,9 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import copy
 import json
 import time
-import copy
 from typing import Union
 
 import torch
@@ -135,6 +135,7 @@ def pack_model(
         set_module(compressed_model, k, new_module)
     return compressed_model
 
+
 class InputCaptureModule(torch.nn.Module):
 
     def __init__(self, model) -> None:
@@ -154,6 +155,7 @@ def forward(self, *args, **kwargs):
             logger.error("Handle cases where input data is neither a Tensor nor a dict")
         return self.orig_model.forward(*args, **kwargs)
 
+
 class AutoRoundQuantizer(Quantizer):
     def __init__(
         self,
@@ -280,7 +282,7 @@ def prepare(self, model: torch.nn.Module, *args, **kwargs):
             data_type=self.data_type,
             scale_dtype=self.scale_dtype,
         )
-        
+
         self.rounder.prepare()
         prepare_model = InputCaptureModule(model)
         return prepare_model
@@ -384,7 +386,7 @@ def get_autoround_default_run_fn(
 
 
 class AutoRoundProcessor(AutoRound):
-    
+
     @torch.no_grad()
     def cache_inter_data(self, block_names, n_samples, layer_names=[], last_cache_name=None):
         """Save the inputs of block_name for calibration. For layers, we cache both of inputs and output.
@@ -431,8 +433,7 @@ def cache_inter_data(self, block_names, n_samples, layer_names=[], last_cache_na
             self.model = self.model.to(tmp_dtype)
 
         return res
-
-
+
     @torch.no_grad()
     def prepare(self):
         """Prepares a given model for quantization."""
@@ -512,7 +513,6 @@ def convert(self):
             self.model = self.model.to("cpu")
 
         all_inputs = res
-
 
         del self.inputs
         inputs = all_inputs[self.block_names[0]]

diff --git a/test/3x/torch/quantization/weight_only/test_autoround.py b/test/3x/torch/quantization/weight_only/test_autoround.py
@@ -63,7 +63,7 @@ def test_autoround(self, quant_lm_head):
 
         # prepare + convert API
         model = prepare(model=fp32_model, quant_config=quant_config)
-        
+
         run_fn(model, *run_args)
         q_model = convert(model)
         out = q_model(self.inp)[0]