[pre-commit.ci] auto fixes from pre-commit.com hooks

pre-commit-ci[bot] · pre-commit-ci[bot] · commit 35181db3d732 · 2024-02-23T07:20:40.000Z
for more information, see https://pre-commit.ci
diff --git a/neural_compressor/onnxrt/algorithms/__init__.py b/neural_compressor/onnxrt/algorithms/__init__.py
@@ -19,10 +19,4 @@
 from neural_compressor.onnxrt.algorithms.weight_only.awq import apply_awq_on_model
 from neural_compressor.onnxrt.algorithms.layer_wise import layer_wise_quant
 
-__all__ = [
-    "Smoother",
-    "apply_rtn_on_model",
-    "apply_gptq_on_model",
-    "apply_awq_on_model",
-    "layer_wise_quant"
-]
+__all__ = ["Smoother", "apply_rtn_on_model", "apply_gptq_on_model", "apply_awq_on_model", "layer_wise_quant"]
diff --git a/neural_compressor/onnxrt/algorithms/layer_wise/core.py b/neural_compressor/onnxrt/algorithms/layer_wise/core.py
@@ -21,22 +21,23 @@
 import os
 from copy import deepcopy
 from pathlib import Path
-from typing import Union, Callable, List
+from typing import Callable, List, Union
 
 import onnx
 import onnxruntime as ort
 
+from neural_compressor.common import Logger
 from neural_compressor.onnxrt.quantization.calibrate import CalibrationDataReader
 from neural_compressor.onnxrt.utils.onnx_model import ONNXModel
 from neural_compressor.onnxrt.utils.utility import check_model_with_infer_shapes
-from neural_compressor.common import Logger
 
 logger = Logger().get_logger()
 
 __all__ = [
     "layer_wise_quant",
 ]
 
+
 def layer_wise_quant(
     model: Union[onnx.ModelProto, ONNXModel, Path, str],
     quant_func: Callable,
@@ -95,9 +96,7 @@ def layer_wise_quant(
         )
     )
     logger.debug(
-        "Will split model with these nodes for layer-wise quantization: {}".format(
-            [node.name for node in split_nodes]
-        )
+        "Will split model with these nodes for layer-wise quantization: {}".format([node.name for node in split_nodes])
     )
 
     split_idx = 1
@@ -129,8 +128,12 @@ def layer_wise_quant(
         logger.info("Quantize split model {}".format(split_idx))
         if require_data_reader:
             # process data_reader for current split and next split
-            current_data_reader = _filter_data_reader_for_current_split_model(split_model_part_1.model, current_data_reader)
-            next_data_reader = _prepare_data_reader_for_next_split_model(split_model_part_1.model_path, current_data_reader, providers)
+            current_data_reader = _filter_data_reader_for_current_split_model(
+                split_model_part_1.model, current_data_reader
+            )
+            next_data_reader = _prepare_data_reader_for_next_split_model(
+                split_model_part_1.model_path, current_data_reader, providers
+            )
             lwq_data_reader.append(next_data_reader)
 
             # perform quantization
@@ -144,18 +147,17 @@ def layer_wise_quant(
         else:
             # perform quantization
             split_model_part_1_quantized = quant_func(
-                split_model_part_1,
-                weight_config=weight_config,
-                return_modelproto=False,
-                **kwargs
+                split_model_part_1, weight_config=weight_config, return_modelproto=False, **kwargs
             )
 
         # check split model is valid
         try:
             ort.InferenceSession(split_model_part_1_quantized.model.SerializeToString(), providers=providers)
         except Exception as e:
-            logger.error("Layer-wise quantized model {} can't be inferred correctly. "
-                         "Please check the raise exception".format(split_idx))
+            logger.error(
+                "Layer-wise quantized model {} can't be inferred correctly. "
+                "Please check the raise exception".format(split_idx)
+            )
             raise e
 
         # merge split quantized model
@@ -174,7 +176,9 @@ def layer_wise_quant(
             if require_data_reader:
                 # process data_reader for current split
                 current_data_reader = lwq_data_reader.pop(0)
-                current_data_reader = _filter_data_reader_for_current_split_model(split_model_part_2.model, current_data_reader)
+                current_data_reader = _filter_data_reader_for_current_split_model(
+                    split_model_part_2.model, current_data_reader
+                )
 
                 # perform quantization
                 split_model_part_2_quantized = quant_func(
@@ -187,18 +191,17 @@ def layer_wise_quant(
             else:
                 # perform quantization
                 split_model_part_2_quantized = quant_func(
-                    split_model_part_2,
-                    weight_config=weight_config,
-                    return_modelproto=False,
-                    **kwargs
+                    split_model_part_2, weight_config=weight_config, return_modelproto=False, **kwargs
                 )
 
             # check split model is valid
             try:
                 ort.InferenceSession(split_model_part_2_quantized.model.SerializeToString(), providers=providers)
             except Exception as e:
-                logger.error("Layer-wise quantized model {} can't be inferred correctly. "
-                            "Please check the raise exception".format(split_idx))
+                logger.error(
+                    "Layer-wise quantized model {} can't be inferred correctly. "
+                    "Please check the raise exception".format(split_idx)
+                )
                 raise e
 
             # merge split quantized model
@@ -210,6 +213,7 @@ def layer_wise_quant(
 
     # reload external data to prevent external data file path errors
     from onnx.external_data_helper import load_external_data_for_model
+
     load_external_data_for_model(quantized_model_merged.model, os.path.dirname(quantized_model_merged.model_path))
 
     return quantized_model_merged
@@ -246,13 +250,12 @@ def _filter_data_reader_for_current_split_model(model: onnx.ModelProto, data_rea
         if not inputs:
             break
         filter_input = {
-            input_name: input_tensor
-            for input_name, input_tensor in inputs.items()
-            if input_name in input_names
+            input_name: input_tensor for input_name, input_tensor in inputs.items() if input_name in input_names
         }
         filter_inputs.append(filter_input)
     return DataReader(filter_inputs)
 
+
 def _prepare_data_reader_for_next_split_model(
     model_path: str,
     data_reader: CalibrationDataReader,
diff --git a/neural_compressor/onnxrt/algorithms/weight_only/gptq.py b/neural_compressor/onnxrt/algorithms/weight_only/gptq.py
@@ -439,13 +439,12 @@ def apply_gptq_on_model(
             quant_func=gptq_quantize,
             weight_config=quant_config,
             data_reader=calibration_data_reader,
-            **quant_kwargs)
+            **quant_kwargs
+        )
     else:
         quantized_model = gptq_quantize(
-            model,
-            data_reader=calibration_data_reader,
-            weight_config=quant_config,
-            **quant_kwargs)
+            model, data_reader=calibration_data_reader, weight_config=quant_config, **quant_kwargs
+        )
 
     if isinstance(quantized_model, ONNXModel):
         quantized_model = quantized_model.model
diff --git a/neural_compressor/onnxrt/algorithms/weight_only/rtn.py b/neural_compressor/onnxrt/algorithms/weight_only/rtn.py
@@ -213,11 +213,9 @@ def apply_rtn_on_model(model: Union[onnx.ModelProto, ONNXModel, Path, str], quan
     if layer_wise:
         from neural_compressor.onnxrt.algorithms import layer_wise_quant
 
-        quantized_model = layer_wise_quant(
-            model, quant_func=rtn_quantize, weight_config=quant_config, **quant_kwargs)
+        quantized_model = layer_wise_quant(model, quant_func=rtn_quantize, weight_config=quant_config, **quant_kwargs)
     else:
-        quantized_model = rtn_quantize(
-            model, weight_config=quant_config, **quant_kwargs)
+        quantized_model = rtn_quantize(model, weight_config=quant_config, **quant_kwargs)
 
     if isinstance(quantized_model, ONNXModel):
         quantized_model = quantized_model.model
diff --git a/neural_compressor/onnxrt/utils/onnx_model.py b/neural_compressor/onnxrt/utils/onnx_model.py
@@ -74,6 +74,7 @@ def model_path(self, path):
     def check_is_large_model(self):
         """Check model > 2GB."""
         from neural_compressor.onnxrt.utils.utility import MAXIMUM_PROTOBUF
+
         init_size = 0
         for init in self.model.graph.initializer:
             # if initializer has external data location, return True
@@ -417,7 +418,9 @@ def topological_sort(self, enable_subgraph=False):
     def get_nodes_chain(self, start, stop, result_chain=[]):
         """Get nodes chain with given start node and stop node."""
         from collections import deque
+
         from onnx import NodeProto
+
         from neural_compressor.onnxrt.utils.utility import find_by_name
 
         # process start node list
@@ -818,9 +821,7 @@ def find_split_nodes(self):
         split_nodes = self.find_split_node_for_layer_wise_quantization()
         return split_nodes
 
-    def split_model_with_node(
-        self, split_node_name, path_of_model_to_split, save_both_split_models=True
-    ):
+    def split_model_with_node(self, split_node_name, path_of_model_to_split, save_both_split_models=True):
         """Split model into two parts at a given node.
 
         Args:
diff --git a/neural_compressor/onnxrt/utils/utility.py b/neural_compressor/onnxrt/utils/utility.py
@@ -17,8 +17,8 @@
 
 import numpy as np
 import onnx
-from packaging.version import Version
 import onnxruntime.tools.symbolic_shape_infer as symbolic_shape_infer
+from packaging.version import Version
 
 from neural_compressor.common import Logger
 
@@ -274,6 +274,7 @@ def quantize_data(data, quantize_range, qType, scheme):
     quantized_data = _quantize_data_with_scale_zero(data, qType, scheme, scale, zero_point)
     return rmin, rmax, zero_point, scale, quantized_data
 
+
 def check_model_with_infer_shapes(model):
     """Check if the model has been shape inferred."""
     from neural_compressor.onnxrt.utils.onnx_model import ONNXModel
diff --git a/test/3x/onnxrt/quantization/layer_wise/test_layer_wise.py b/test/3x/onnxrt/quantization/layer_wise/test_layer_wise.py
@@ -1,17 +1,17 @@
 import os
-import torch
 import shutil
 import unittest
 from copy import deepcopy
-from transformers import AutoTokenizer
 
 import onnx
-from optimum.exporters.onnx import main_export
 import onnxruntime as ort
 import onnxruntime.tools.symbolic_shape_infer as symbolic_shape_infer
+import torch
+from optimum.exporters.onnx import main_export
+from transformers import AutoTokenizer
 
-from neural_compressor.onnxrt.quantization.calibrate import CalibrationDataReader
 from neural_compressor.common import Logger
+from neural_compressor.onnxrt.quantization.calibrate import CalibrationDataReader
 
 logger = Logger().get_logger()
 
@@ -24,6 +24,7 @@ def find_onnx_file(folder_path):
                 return os.path.join(root, file)
     return None
 
+
 class DummyNLPDataloader(CalibrationDataReader):
     def __init__(self, model_name):
         self.tokenizer = AutoTokenizer.from_pretrained(model_name)
@@ -51,6 +52,7 @@ def get_next(self):
     def rewind(self):
         self.iter_next = iter(self.encoded_list)
 
+
 class TestLayerWiseQuant(unittest.TestCase):
     @classmethod
     def setUpClass(self):
@@ -60,7 +62,7 @@ def setUpClass(self):
 
         model = onnx.load(model_path)
         model = symbolic_shape_infer.SymbolicShapeInference.infer_shapes(model, auto_merge=True)
-        infer_shape_model_path = 'llama-2-tiny/model-infer-shape.onnx'
+        infer_shape_model_path = "llama-2-tiny/model-infer-shape.onnx"
         onnx.save(model, infer_shape_model_path)
 
         sess_options = ort.SessionOptions()