From 948d72ce32b0a1c87a146cc67e3918d4e6f07f28 Mon Sep 17 00:00:00 2001 From: Alexander Dokuchaev Date: Mon, 9 Dec 2024 17:19:38 +0200 Subject: [PATCH 1/3] init model_wrapper --- nncf/common/factory.py | 20 +++++- nncf/common/model.py | 67 +++++++++++++++++++ nncf/onnx/quantization/quantize_model.py | 12 ++-- .../openvino/quantization/quantize_ifmodel.py | 5 +- nncf/openvino/quantization/quantize_model.py | 10 +-- nncf/quantization/algorithms/algorithm.py | 9 +-- .../algorithms/bias_correction/algorithm.py | 11 +-- .../algorithms/channel_alignment/algorithm.py | 6 +- .../fast_bias_correction/algorithm.py | 17 +++-- .../algorithms/min_max/algorithm.py | 23 ++++--- nncf/quantization/algorithms/pipeline.py | 61 ++++++----------- .../algorithms/post_training/algorithm.py | 3 +- .../algorithms/smooth_quant/algorithm.py | 15 +++-- nncf/torch/quantization/quantize_model.py | 11 +-- .../test_templates/test_bias_correction.py | 12 ++-- .../test_templates/test_channel_alignment.py | 3 +- .../test_fast_bias_correction.py | 7 +- .../test_templates/test_ptq_params.py | 11 ++- .../test_templates/test_smooth_quant.py | 8 +-- tests/onnx/quantization/common.py | 6 +- .../test_fq_params_calculation.py | 7 +- .../native/quantization/test_graphs.py | 7 +- .../ptq/test_calculation_quantizer_params.py | 9 ++- tests/torch/ptq/test_fq_params_calculation.py | 5 +- tests/torch/ptq/test_graphs.py | 6 +- 25 files changed, 220 insertions(+), 131 deletions(-) create mode 100644 nncf/common/model.py diff --git a/nncf/common/factory.py b/nncf/common/factory.py index c5a921c8068..6f17dfc7fbd 100644 --- a/nncf/common/factory.py +++ b/nncf/common/factory.py @@ -9,7 +9,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import TypeVar +import os +from typing import Any, Dict, Optional, Tuple, TypeVar import nncf from nncf.common.engine import Engine @@ -26,13 +27,20 @@ class NNCFGraphFactory: @staticmethod - def create(model: TModel) -> NNCFGraph: + def create( + model: TModel, input_args: Optional[Tuple[Any, ...]] = None, input_kwargs: Optional[Dict[str, Any]] = None + ) -> NNCFGraph: """ Factory method to create backend-specific NNCFGraph instance based on the input model. :param model: backend-specific model instance :return: backend-specific NNCFGraph instance """ + if input_args is None: + input_args = () + if input_kwargs is None: + input_kwargs = {} + model_backend = get_backend(model) if model_backend == BackendType.ONNX: from nncf.onnx.graph.nncf_graph_builder import GraphConverter @@ -47,7 +55,13 @@ def create(model: TModel) -> NNCFGraph: return GraphConverter.create_nncf_graph(model) if model_backend == BackendType.TORCH: - return model.nncf.get_graph() + if os.getenv("NNCF_EXPERIMENTAL_TORCH_TRACING") is None: + return model.nncf.get_graph() + else: + from nncf.experimental.torch2.function_hook.nncf_graph.nncf_graph_builder import build_nncf_graph + + return build_nncf_graph(model, *input_args, **input_kwargs) + raise nncf.UnsupportedBackendError( "Cannot create backend-specific graph because {} is not supported!".format(model_backend.value) ) diff --git a/nncf/common/model.py b/nncf/common/model.py new file mode 100644 index 00000000000..af29c876c11 --- /dev/null +++ b/nncf/common/model.py @@ -0,0 +1,67 @@ +# Copyright (c) 2024 Intel Corporation +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from typing import Any, Dict, Optional, TypeVar + +from nncf.common.factory import NNCFGraphFactory +from nncf.common.graph.graph import NNCFGraph + +TModel = TypeVar("TModel") + + +class StateAttributes: + """ + The state attributes. + """ + + EXAMPLE_INPUT_ARGS = "example_input_args" + EXAMPLE_INPUT_KWARGS = "example_input_kwargs" + + +class ModelWrapper: + """ + A wrapper class for the original model. + + :param _model: The original model to be wrapped. + :param _graph: The graph representation of the model. + :param state: The storage of the model state. + """ + + def __init__( + self, model: TModel, graph: Optional[NNCFGraph] = None, state: Optional[Dict[str, Any]] = None + ) -> None: + self._model = model + self._graph = graph + self.state = state if state is not None else {} + + @property + def model(self) -> TModel: + """ + Retrieves the original model. + """ + return self._model + + @property + def graph(self) -> NNCFGraph: + """ + Returns the NNCFGraph representation of the model. + + If the graph has not been created yet, it will be created using the model, + example input arguments, and example input keyword arguments stored in the state. + """ + if self._graph is None: + self._graph = NNCFGraphFactory.create( + model=self.model, + input_args=self.state.get(StateAttributes.EXAMPLE_INPUT_ARGS), + input_kwargs=self.state.get(StateAttributes.EXAMPLE_INPUT_KWARGS), + ) + return self._graph diff --git a/nncf/onnx/quantization/quantize_model.py b/nncf/onnx/quantization/quantize_model.py index 7a4665d1a0c..05f709939ed 100644 --- a/nncf/onnx/quantization/quantize_model.py +++ b/nncf/onnx/quantization/quantize_model.py @@ -15,10 +15,10 @@ import nncf from nncf.common.logging.logger import nncf_logger +from nncf.common.model import ModelWrapper from nncf.common.quantization.structs import QuantizationPreset from nncf.data import Dataset from nncf.onnx.graph.metatypes.groups import OPERATIONS_OUTPUT_HAS_NO_BATCH_AXIS -from nncf.onnx.graph.nncf_graph_builder import GraphConverter from nncf.parameters import DropType from nncf.parameters import ModelType from nncf.parameters import QuantizationMode @@ -78,11 +78,13 @@ def quantize_impl( advanced_parameters=advanced_parameters, ) - graph = GraphConverter.create_nncf_graph(model) - warning_model_no_batchwise_support(graph, advanced_parameters, model_type, OPERATIONS_OUTPUT_HAS_NO_BATCH_AXIS) - quantized_model = quantization_algorithm.apply(model, graph, dataset=calibration_dataset) + model_wrapper = ModelWrapper(model) + warning_model_no_batchwise_support( + model_wrapper.graph, advanced_parameters, model_type, OPERATIONS_OUTPUT_HAS_NO_BATCH_AXIS + ) + quantized_model = quantization_algorithm.apply(model_wrapper, dataset=calibration_dataset) - return quantized_model + return quantized_model.model def quantize_with_accuracy_control_impl( diff --git a/nncf/openvino/quantization/quantize_ifmodel.py b/nncf/openvino/quantization/quantize_ifmodel.py index 07d22171a17..3fa652bef21 100644 --- a/nncf/openvino/quantization/quantize_ifmodel.py +++ b/nncf/openvino/quantization/quantize_ifmodel.py @@ -25,6 +25,7 @@ from nncf.common.graph.transformations.layout import TransformationLayout from nncf.common.logging import nncf_logger from nncf.common.logging.track_progress import track +from nncf.common.model import ModelWrapper from nncf.common.tensor_statistics.statistic_point import StatisticPointsContainer from nncf.openvino.graph.metatypes.openvino_metatypes import OVIfMetatype from nncf.openvino.graph.model_utils import remove_friendly_name_duplicates @@ -155,7 +156,9 @@ def apply_algorithm_if_bodies( """ nncf_logger.info(f"Iteration [{current_model_num}/{len(graphs)}] ...") parent_graph = graphs[graph_id] - quantized_model = algorithm.apply(parent_model, parent_graph, parent_statistic_points, parent_dataset) + quantized_model = algorithm.apply( + ModelWrapper(parent_model, parent_graph), parent_statistic_points, parent_dataset + ).model if get_number_if_op(parent_model) == 0: return quantized_model, current_model_num model_transformer_fp32 = factory.ModelTransformerFactory.create(parent_model) diff --git a/nncf/openvino/quantization/quantize_model.py b/nncf/openvino/quantization/quantize_model.py index 46db1c50cca..24b96e1e6ae 100644 --- a/nncf/openvino/quantization/quantize_model.py +++ b/nncf/openvino/quantization/quantize_model.py @@ -19,13 +19,13 @@ from nncf.common.factory import NNCFGraphFactory from nncf.common.factory import StatisticsAggregatorFactory from nncf.common.logging import nncf_logger +from nncf.common.model import ModelWrapper from nncf.common.quantization.structs import QuantizationPreset from nncf.data import Dataset from nncf.openvino.graph.metatypes.groups import OPERATIONS_OUTPUT_HAS_NO_BATCH_AXIS from nncf.openvino.graph.metatypes.openvino_metatypes import OVIfMetatype from nncf.openvino.graph.metatypes.openvino_metatypes import get_node_metatype from nncf.openvino.graph.model_utils import remove_friendly_name_duplicates -from nncf.openvino.graph.nncf_graph_builder import GraphConverter from nncf.openvino.graph.node_utils import get_number_if_op from nncf.openvino.quantization.backend_parameters import BackendParameters from nncf.openvino.quantization.backend_parameters import is_weight_compression_needed @@ -166,9 +166,11 @@ def native_quantize_impl( ignored_scope=ignored_scope, advanced_parameters=advanced_parameters, ) - graph = GraphConverter.create_nncf_graph(model) - warning_model_no_batchwise_support(graph, advanced_parameters, model_type, OPERATIONS_OUTPUT_HAS_NO_BATCH_AXIS) - quantized_model = quantization_algorithm.apply(model, graph, dataset=calibration_dataset) + model_wrapper = ModelWrapper(model) + warning_model_no_batchwise_support( + model_wrapper.graph, advanced_parameters, model_type, OPERATIONS_OUTPUT_HAS_NO_BATCH_AXIS + ) + quantized_model = quantization_algorithm.apply(model_wrapper, dataset=calibration_dataset).model if is_weight_compression_needed(advanced_parameters): compress_quantize_weights_transformation(quantized_model) diff --git a/nncf/quantization/algorithms/algorithm.py b/nncf/quantization/algorithms/algorithm.py index befe0a82f9d..f5fe6896971 100644 --- a/nncf/quantization/algorithms/algorithm.py +++ b/nncf/quantization/algorithms/algorithm.py @@ -14,7 +14,7 @@ from typing import List, Optional, TypeVar from nncf import Dataset -from nncf.common.graph.graph import NNCFGraph +from nncf.common.model import ModelWrapper from nncf.common.tensor_statistics.statistic_point import StatisticPointsContainer from nncf.common.utils.backend import BackendType @@ -38,8 +38,7 @@ def available_backends(self) -> List[BackendType]: @abstractmethod def apply( self, - model: TModel, - graph: NNCFGraph, + model: ModelWrapper, statistic_points: Optional[StatisticPointsContainer] = None, dataset: Optional[Dataset] = None, ) -> TModel: @@ -47,18 +46,16 @@ def apply( Applies the algorithm to the model. :param model: Model for applying algorithm. - :param graph: Model graph. :param statistic_points: Statistic points with collected statistics values. :param dataset: A representative dataset for the calibration process. :return: A resulting model. """ @abstractmethod - def get_statistic_points(self, model: TModel, graph: NNCFGraph) -> StatisticPointsContainer: + def get_statistic_points(self, model: ModelWrapper) -> StatisticPointsContainer: """ Returns statistic points, for which StatisticsCollector should collect statistics. :param model: Model for statistics collection. - :param graph: Model graph. :return: Statistic points, for which StatisticsCollector should collect statistics. """ diff --git a/nncf/quantization/algorithms/bias_correction/algorithm.py b/nncf/quantization/algorithms/bias_correction/algorithm.py index 63db2ee0adf..fdfba42ace5 100644 --- a/nncf/quantization/algorithms/bias_correction/algorithm.py +++ b/nncf/quantization/algorithms/bias_correction/algorithm.py @@ -25,6 +25,7 @@ from nncf.common.graph.transformations.commands import TransformationCommand from nncf.common.graph.transformations.layout import TransformationLayout from nncf.common.logging.track_progress import track +from nncf.common.model import ModelWrapper from nncf.common.tensor_statistics.statistic_point import StatisticPoint from nncf.common.tensor_statistics.statistic_point import StatisticPointsContainer from nncf.common.utils.backend import BackendType @@ -133,11 +134,11 @@ def _set_backend_entity(self, model: TModel) -> None: def apply( self, - model: TModel, - graph: NNCFGraph, + model_wrapper: ModelWrapper, statistic_points: Optional[StatisticPointsContainer] = None, dataset: Optional[Dataset] = None, ) -> TModel: + model = model_wrapper.model self._set_backend_entity(model) main_transformations_layout = TransformationLayout() main_model_transformer = ModelTransformerFactory.create(model) @@ -553,8 +554,10 @@ def output_filter_func(point): output_fp.extend(tensor_collector.get_statistics().mean_values) return output_fp - def get_statistic_points(self, model: TModel, graph: NNCFGraph) -> StatisticPointsContainer: - self._set_backend_entity(model) + def get_statistic_points(self, model_wrapper: ModelWrapper) -> StatisticPointsContainer: + model = model_wrapper.model + graph = model_wrapper.graph + self._set_backend_entity(model_wrapper.model) statistic_container = StatisticPointsContainer() nodes_with_bias = [ diff --git a/nncf/quantization/algorithms/channel_alignment/algorithm.py b/nncf/quantization/algorithms/channel_alignment/algorithm.py index b30749b6d2c..2a0fb0f4a1b 100644 --- a/nncf/quantization/algorithms/channel_alignment/algorithm.py +++ b/nncf/quantization/algorithms/channel_alignment/algorithm.py @@ -25,6 +25,7 @@ from nncf.common.graph.utils import get_reduction_axes from nncf.common.logging import nncf_logger from nncf.common.logging.track_progress import track +from nncf.common.model import ModelWrapper from nncf.common.tensor_statistics.statistic_point import StatisticPoint from nncf.common.tensor_statistics.statistic_point import StatisticPointsContainer from nncf.common.utils.backend import BackendType @@ -381,8 +382,9 @@ def _get_target_point_and_node_in(self, conv_in, add_in) -> Tuple[TargetPoint, N node_in, ) - def get_statistic_points(self, model: TModel, graph: NNCFGraph) -> StatisticPointsContainer: - self._set_backend_entity(model) + def get_statistic_points(self, model_wrapper: ModelWrapper) -> StatisticPointsContainer: + self._set_backend_entity(model_wrapper.model) + graph = model_wrapper.graph statistic_container = StatisticPointsContainer() for conv_in, add_in, _ in self._get_node_pairs(graph): diff --git a/nncf/quantization/algorithms/fast_bias_correction/algorithm.py b/nncf/quantization/algorithms/fast_bias_correction/algorithm.py index 3d104cad3c9..40c8a87a364 100644 --- a/nncf/quantization/algorithms/fast_bias_correction/algorithm.py +++ b/nncf/quantization/algorithms/fast_bias_correction/algorithm.py @@ -16,13 +16,13 @@ from nncf import Dataset from nncf.common.factory import EngineFactory from nncf.common.factory import ModelTransformerFactory -from nncf.common.graph.graph import NNCFGraph from nncf.common.graph.model_transformer import ModelTransformer from nncf.common.graph.transformations.commands import TargetPoint from nncf.common.graph.transformations.commands import TargetType from nncf.common.graph.transformations.layout import TransformationLayout from nncf.common.logging import nncf_logger from nncf.common.logging.track_progress import track +from nncf.common.model import ModelWrapper from nncf.common.tensor_statistics.statistic_point import StatisticPoint from nncf.common.tensor_statistics.statistic_point import StatisticPointsContainer from nncf.common.utils.backend import BackendType @@ -129,11 +129,12 @@ def _set_backend_entity(self, model: TModel) -> None: def apply( self, - model: TModel, - graph: NNCFGraph, + model_wrapper: ModelWrapper, statistic_points: Optional[StatisticPointsContainer] = None, dataset: Optional[Dataset] = None, - ) -> TModel: + ) -> ModelWrapper: + model = model_wrapper.model + graph = model_wrapper.graph self._set_backend_entity(model) model_transformer = ModelTransformerFactory.create(model) @@ -207,7 +208,9 @@ def apply( transformation_layout.register(self._backend_entity.create_bias_correction_command(node, bias_value, graph)) transformed_model = model_transformer.transform(transformation_layout) - return transformed_model + return ModelWrapper( + model=transformed_model, graph=graph, state=model_wrapper.state # BC dows not changed model's graph + ) @staticmethod def _get_bias_shift_magnitude(current_bias_value: Tensor, updated_bias_value: Tensor) -> Tensor: @@ -345,7 +348,9 @@ def _get_bias_shift( bias_shift = fns.stack(output_fp) - q_outputs return bias_shift - def get_statistic_points(self, model: TModel, graph: NNCFGraph) -> StatisticPointsContainer: + def get_statistic_points(self, model_wrapper: ModelWrapper) -> StatisticPointsContainer: + model = model_wrapper.model + graph = model_wrapper.graph self._set_backend_entity(model) nodes_with_bias = [ node for node in graph.get_all_nodes() if self._backend_entity.is_node_with_bias(node, graph) diff --git a/nncf/quantization/algorithms/min_max/algorithm.py b/nncf/quantization/algorithms/min_max/algorithm.py index dea9211b734..b802ea4536e 100644 --- a/nncf/quantization/algorithms/min_max/algorithm.py +++ b/nncf/quantization/algorithms/min_max/algorithm.py @@ -31,6 +31,7 @@ from nncf.common.hardware.config import get_hw_config_type from nncf.common.insertion_point_graph import InsertionPointGraph from nncf.common.logging import nncf_logger +from nncf.common.model import ModelWrapper from nncf.common.quantization.config_assignment import assign_qconfig_lists_to_modules from nncf.common.quantization.initialization.range import RangeInitCollectorParams from nncf.common.quantization.quantizer_propagation.solver import QuantizerPropagationRule @@ -889,14 +890,16 @@ def _get_quantization_points_overflow_fix( def apply( self, - model: TModel, - graph: NNCFGraph, + model_wrapper: ModelWrapper, statistic_points: Optional[StatisticPointsContainer] = None, dataset: Optional[Dataset] = None, - ) -> TModel: + ) -> ModelWrapper: transformation_layout = TransformationLayout() - model_transformer = ModelTransformerFactory.create(model) - quantization_target_points, unified_scale_groups = self._get_quantization_target_points(model, graph) + model_transformer = ModelTransformerFactory.create(model_wrapper.model) + graph = model_wrapper.graph + quantization_target_points, unified_scale_groups = self._get_quantization_target_points( + model_wrapper.model, graph + ) quantization_points_overflow_fix = self._get_quantization_points_overflow_fix( self._overflow_fix, quantization_target_points, graph ) @@ -987,12 +990,12 @@ def filter_func(point: StatisticPoint) -> bool: if not transformation_layout.transformations: nncf_logger.info("The model has no operations to apply quantization.") quantized_model = model_transformer.transform(transformation_layout) - return quantized_model + return ModelWrapper(quantized_model, state=model_wrapper.state) - def get_statistic_points(self, model: TModel, graph: NNCFGraph) -> StatisticPointsContainer: - self._set_backend_entity(model) + def get_statistic_points(self, model_wrapper: ModelWrapper) -> StatisticPointsContainer: + self._set_backend_entity(model_wrapper.model) self._reset_cache() - quantization_target_points, _ = self._get_quantization_target_points(model, graph) + quantization_target_points, _ = self._get_quantization_target_points(model_wrapper.model, model_wrapper.graph) output = StatisticPointsContainer() for quantization_target_point, qconfig in quantization_target_points.items(): nncf_logger.debug( @@ -1000,7 +1003,7 @@ def get_statistic_points(self, model: TModel, graph: NNCFGraph) -> StatisticPoin f" with type {quantization_target_point.type} for statistics collection" ) stat_collector = self._get_stat_collector( - graph, quantization_target_point, qconfig, self._batchwise_statistics + model_wrapper.graph, quantization_target_point, qconfig, self._batchwise_statistics ) output.add_statistic_point( StatisticPoint( diff --git a/nncf/quantization/algorithms/pipeline.py b/nncf/quantization/algorithms/pipeline.py index cd615258553..ae9d4276a3c 100644 --- a/nncf/quantization/algorithms/pipeline.py +++ b/nncf/quantization/algorithms/pipeline.py @@ -11,10 +11,9 @@ from typing import Dict, List, Optional, TypeVar, Union -from nncf.common.factory import NNCFGraphFactory from nncf.common.factory import StatisticsAggregatorFactory -from nncf.common.graph.graph import NNCFGraph from nncf.common.logging import nncf_logger +from nncf.common.model import ModelWrapper from nncf.common.tensor_statistics.statistic_point import StatisticPointsContainer from nncf.common.utils.backend import BackendType from nncf.common.utils.backend import get_backend @@ -27,8 +26,7 @@ def collect_statistics( containers: Union[StatisticPointsContainer, List[StatisticPointsContainer]], - model: TModel, - graph: NNCFGraph, + model_state: ModelWrapper, dataset: Dataset, ) -> StatisticPointsContainer: """ @@ -36,17 +34,17 @@ def collect_statistics( :param statistic_points: Statistic points that need to be collected. :param model: A model. - :param graph: A graph assosiated with a model. + :param graph: A graph associated with a model. :param dataset: A dataset. :return: Collected statistics. """ if not isinstance(containers, list): containers = [containers] - statistics_aggregator = StatisticsAggregatorFactory.create(model, dataset) + statistics_aggregator = StatisticsAggregatorFactory.create(model_state.model, dataset) for container in containers: statistics_aggregator.register_statistic_points(container) - statistics_aggregator.collect_statistics(model, graph) + statistics_aggregator.collect_statistics(model_state.model, model_state.graph) return statistics_aggregator.statistic_points @@ -96,8 +94,7 @@ def run_step( self, step_index: int, step_statistics: StatisticPointsContainer, - model: TModel, - graph: NNCFGraph, + model_wrapper: ModelWrapper, ) -> TModel: """ Executes a provided pipeline step on the provided model. @@ -105,36 +102,31 @@ def run_step( :param step_index: Zero-based index of the pipeline step that should be executed :param step_statistics: Statistics required to execute a pipeline step. :param model: A model to which a pipeline step will be applied. - :param graph: A graph assosiated with a model. + :param graph: A graph associated with a model. :return: The updated model after executing the pipeline step. """ - current_model = model - current_graph = graph + current_model = model_wrapper - pipeline_steps = self._remove_unsupported_algorithms(get_backend(model)) + pipeline_steps = self._remove_unsupported_algorithms(get_backend(model_wrapper.model)) pipeline_step = pipeline_steps[step_index] - for algorithm in pipeline_step[:-1]: - current_model = algorithm.apply(current_model, current_graph, step_statistics) - current_graph = NNCFGraphFactory.create(current_model) - current_model = pipeline_step[-1].apply(current_model, current_graph, step_statistics) - + for algorithm in pipeline_step: + current_model = algorithm.apply(current_model, step_statistics) return current_model def run_from_step( self, - model: TModel, + model: ModelWrapper, dataset: Dataset, - graph: Optional[NNCFGraph] = None, start_step_index: int = 0, step_index_to_statistics: Optional[Dict[int, StatisticPointsContainer]] = None, - ) -> TModel: + ) -> ModelWrapper: """ Executes the pipeline from the specified pipeline step to the end. :param model: This is the model after the (start_step_index - 1)-th pipeline step, or the initial model if start_step_index is 0. :param dataset: A dataset that holds the data items for pipeline steps. - :param graph: A graph assosiated with a model. + :param graph: A graph associated with a model. :param start_step_index: Zero-based pipeline step index from which the pipeline should be executed. :param step_index_to_statistics: A mapping from pipeline step index to statistics @@ -142,47 +134,38 @@ def run_from_step( :return: The updated model after executing the pipeline from the specified pipeline step to the end. """ - pipeline_steps = self._remove_unsupported_algorithms(get_backend(model)) + pipeline_steps = self._remove_unsupported_algorithms(get_backend(model.model)) if step_index_to_statistics is None: step_index_to_statistics = {} # The `step_model` and `step_graph` entities are required to execute `step_index`-th pipeline step step_model = model - step_graph = graph for step_index in range(start_step_index, len(pipeline_steps)): - # Create graph required to run current pipeline step - if step_graph is None: - step_graph = NNCFGraphFactory.create(step_model) - # Collect statistics required to run current pipeline step step_statistics = step_index_to_statistics.get(step_index) if step_statistics is None: - statistic_points = self.get_statistic_points_for_step(step_index, step_model, step_graph) - step_statistics = collect_statistics(statistic_points, step_model, step_graph, dataset) + statistic_points = self.get_statistic_points_for_step(step_index, step_model) + step_statistics = collect_statistics(statistic_points, step_model, dataset) # Run current pipeline step - step_model = self.run_step(step_index, step_statistics, step_model, step_graph) - - step_graph = None # We should rebuild the graph for the next pipeline step + step_model = self.run_step(step_index, step_statistics, step_model) return step_model - def get_statistic_points_for_step( - self, step_index: int, model: TModel, graph: NNCFGraph - ) -> StatisticPointsContainer: + def get_statistic_points_for_step(self, step_index: int, model_wrapper: ModelWrapper) -> StatisticPointsContainer: """ Returns statistics that should be collected to execute `step_index`-th pipeline step. :param step_index: Zero-based index of the pipeline step. :param model: A model. - :param graph: A graph assosiated with a model. + :param graph: A graph associated with a model. :return: Statistics that should be collected to execute `step_index`-th pipeline step. """ container = StatisticPointsContainer() - pipeline_steps = self._remove_unsupported_algorithms(get_backend(model)) + pipeline_steps = self._remove_unsupported_algorithms(get_backend(model_wrapper.model)) pipeline_step = pipeline_steps[step_index] for algorithm in pipeline_step: - for statistic_points in algorithm.get_statistic_points(model, graph).values(): + for statistic_points in algorithm.get_statistic_points(model_wrapper).values(): for statistic_point in statistic_points: container.add_statistic_point(statistic_point) diff --git a/nncf/quantization/algorithms/post_training/algorithm.py b/nncf/quantization/algorithms/post_training/algorithm.py index 862dc5d5037..9fd02014770 100644 --- a/nncf/quantization/algorithms/post_training/algorithm.py +++ b/nncf/quantization/algorithms/post_training/algorithm.py @@ -95,7 +95,6 @@ def get_statistic_points(self, model: TModel, graph: NNCFGraph) -> StatisticPoin def apply( self, model: TModel, - graph: NNCFGraph, statistic_points: Optional[StatisticPointsContainer] = None, dataset: Optional[Dataset] = None, ) -> TModel: @@ -109,4 +108,4 @@ def apply( if statistic_points: step_index_to_statistics = {0: statistic_points} - return self._pipeline.run_from_step(model, dataset, graph, 0, step_index_to_statistics) + return self._pipeline.run_from_step(model, dataset, 0, step_index_to_statistics) diff --git a/nncf/quantization/algorithms/smooth_quant/algorithm.py b/nncf/quantization/algorithms/smooth_quant/algorithm.py index 83aefc6709a..77ab1d8ed9b 100644 --- a/nncf/quantization/algorithms/smooth_quant/algorithm.py +++ b/nncf/quantization/algorithms/smooth_quant/algorithm.py @@ -24,6 +24,7 @@ from nncf.common.graph.utils import get_reduction_axes from nncf.common.logging import nncf_logger from nncf.common.logging.track_progress import track +from nncf.common.model import ModelWrapper from nncf.common.tensor_statistics.statistic_point import StatisticPoint from nncf.common.tensor_statistics.statistic_point import StatisticPointsContainer from nncf.common.utils.backend import BackendType @@ -98,11 +99,12 @@ def _set_backend_entity(self, model: TModel) -> None: def apply( self, - model: TModel, - graph: NNCFGraph, + model_wrapper: ModelWrapper, statistic_points: Optional[StatisticPointsContainer] = None, dataset: Optional[Dataset] = None, - ) -> TModel: + ) -> ModelWrapper: + model = model_wrapper.model + graph = model_wrapper.graph self._set_backend_entity(model) alpha_map = self._get_alpha_map() @@ -176,7 +178,7 @@ def apply( transformation_layout.register(scale_insertion_command) transformed_model = model_transformer.transform(transformation_layout) - return transformed_model + return ModelWrapper(model=transformed_model, state=model_wrapper.state) @staticmethod def _calculate_scale_and_ratio( @@ -245,7 +247,10 @@ def _get_statistics_for_node( statistics_for_node.append(statistic) return statistics_for_node - def get_statistic_points(self, model: TModel, graph: NNCFGraph) -> StatisticPointsContainer: + def get_statistic_points(self, model_wrapper: ModelWrapper) -> StatisticPointsContainer: + model = model_wrapper.model + graph = model_wrapper.graph + statistic_container = StatisticPointsContainer() self._set_backend_entity(model) diff --git a/nncf/torch/quantization/quantize_model.py b/nncf/torch/quantization/quantize_model.py index 23cb451f5fe..fb3ee792730 100644 --- a/nncf/torch/quantization/quantize_model.py +++ b/nncf/torch/quantization/quantize_model.py @@ -16,6 +16,7 @@ import nncf from nncf.common.factory import NNCFGraphFactory +from nncf.common.model import ModelWrapper from nncf.common.quantization.structs import QuantizationPreset from nncf.data import Dataset from nncf.parameters import BackupMode @@ -32,6 +33,7 @@ from nncf.scopes import IgnoredScope from nncf.torch.graph.operator_metatypes import OPERATIONS_OUTPUT_HAS_NO_BATCH_AXIS from nncf.torch.model_creation import wrap_model +from nncf.torch.nncf_network import NNCFNetwork DEFAULT_RANGE_TYPE = "mean_min_max" @@ -72,12 +74,13 @@ def quantize_impl( ignored_scope=ignored_scope, advanced_parameters=advanced_parameters, ) - graph = nncf_network.nncf.get_graph() - warning_model_no_batchwise_support(graph, advanced_parameters, model_type, OPERATIONS_OUTPUT_HAS_NO_BATCH_AXIS) - quantized_model = quantization_algorithm.apply(nncf_network, graph, dataset=calibration_dataset) + model_wrapper = ModelWrapper(nncf_network) + warning_model_no_batchwise_support( + model_wrapper.graph, advanced_parameters, model_type, OPERATIONS_OUTPUT_HAS_NO_BATCH_AXIS + ) + quantized_model: NNCFNetwork = quantization_algorithm.apply(model_wrapper, dataset=calibration_dataset).model quantized_model.nncf.disable_dynamic_graph_building() - return quantized_model diff --git a/tests/cross_fw/test_templates/test_bias_correction.py b/tests/cross_fw/test_templates/test_bias_correction.py index 81b638eb900..6fe670c7496 100644 --- a/tests/cross_fw/test_templates/test_bias_correction.py +++ b/tests/cross_fw/test_templates/test_bias_correction.py @@ -15,6 +15,7 @@ import pytest from nncf.common.factory import NNCFGraphFactory +from nncf.common.model import ModelWrapper from nncf.data import Dataset from nncf.quantization.advanced_parameters import AdvancedQuantizationParameters from nncf.quantization.advanced_parameters import OverflowFix @@ -121,9 +122,8 @@ def quantized_test_model(self, tmpdir) -> TModel: dataset = Dataset(self.get_dataset(model_cls.INPUT_SIZE), self.get_transform_fn()) quantization_algorithm = self.get_quantization_algorithm(disable_bias_correction=True) - graph = NNCFGraphFactory.create(model) - quantized_model = quantization_algorithm.apply(model, graph, dataset=dataset) - modified_model = self.remove_fq_from_inputs(quantized_model) + quantized_model = quantization_algorithm.apply(ModelWrapper(model), dataset=dataset) + modified_model = self.remove_fq_from_inputs(quantized_model.model) return modified_model @pytest.mark.parametrize( @@ -150,8 +150,7 @@ def test_update_bias(self, model_cls, ref_biases, tmpdir): dataset = Dataset(self.get_dataset(model_cls.INPUT_SIZE), self.get_transform_fn()) quantization_algorithm = self.get_quantization_algorithm() - graph = NNCFGraphFactory.create(model) - quantized_model = quantization_algorithm.apply(model, graph, dataset=dataset) + quantized_model = quantization_algorithm.apply(ModelWrapper(model), dataset=dataset) mapped_ref_biases = self.map_references(ref_biases, model_cls) self.check_bias(quantized_model, mapped_ref_biases) @@ -171,10 +170,9 @@ def test__get_subgraph_data_for_node(self, quantized_test_model, layer_name, ref def test_verify_collected_stat_inputs_map(self, model_cls, ref_stat_inputs_map, tmpdir): model = self.backend_specific_model(model_cls(), tmpdir) - graph = NNCFGraphFactory.create(model) bc_algo = self.get_bias_correction_algorithm() - bc_algo.get_statistic_points(model, graph) + bc_algo.get_statistic_points(ModelWrapper(model)) collected_stat_inputs_map = getattr(bc_algo, "_collected_stat_inputs_map") assert collected_stat_inputs_map == ref_stat_inputs_map diff --git a/tests/cross_fw/test_templates/test_channel_alignment.py b/tests/cross_fw/test_templates/test_channel_alignment.py index 7995f91961c..373fe356802 100644 --- a/tests/cross_fw/test_templates/test_channel_alignment.py +++ b/tests/cross_fw/test_templates/test_channel_alignment.py @@ -22,6 +22,7 @@ from nncf.common.graph.model_transformer import ModelTransformer from nncf.common.graph.transformations.commands import TargetType from nncf.common.graph.transformations.commands import TransformationType +from nncf.common.model import ModelWrapper from nncf.common.tensor_statistics.statistic_point import StatisticPoint from nncf.common.tensor_statistics.statistic_point import StatisticPointsContainer from nncf.common.tensor_statistics.statistics import MinMaxTensorStatistic @@ -511,7 +512,7 @@ class MockBackend(backend_cls): MockBackend.get_statistic_collector = mocker.MagicMock(return_value=ref_stat_collector) algorithm._backend_entity = MockBackend - statistic_container = algorithm.get_statistic_points(None, nncf_graph) + statistic_container = algorithm.get_statistic_points(ModelWrapper(None, nncf_graph)) backend_cls = self.get_backend_cls() target_node_name = "/Add_1_0" if num_biases else "/Conv_1_0" diff --git a/tests/cross_fw/test_templates/test_fast_bias_correction.py b/tests/cross_fw/test_templates/test_fast_bias_correction.py index 899be7d9a1a..22c91654c11 100644 --- a/tests/cross_fw/test_templates/test_fast_bias_correction.py +++ b/tests/cross_fw/test_templates/test_fast_bias_correction.py @@ -14,7 +14,7 @@ import pytest -from nncf.common.factory import NNCFGraphFactory +from nncf.common.model import ModelWrapper from nncf.quantization.advanced_parameters import AdvancedQuantizationParameters from nncf.quantization.advanced_parameters import OverflowFix from nncf.quantization.algorithms.fast_bias_correction.algorithm import FastBiasCorrection @@ -115,7 +115,6 @@ def test_update_bias(self, model_cls, ref_bias, tmpdir): dataset = get_static_dataset(model_cls.INPUT_SIZE, self.get_transform_fn(), self.fn_to_type) quantization_algorithm = self.get_quantization_algorithm() - graph = NNCFGraphFactory.create(model) - quantized_model = quantization_algorithm.apply(model, graph, dataset=dataset) + quantized_model = quantization_algorithm.apply(ModelWrapper(model), dataset=dataset) - self.check_bias(quantized_model, ref_bias) + self.check_bias(quantized_model.model, ref_bias) diff --git a/tests/cross_fw/test_templates/test_ptq_params.py b/tests/cross_fw/test_templates/test_ptq_params.py index eacf57652e7..d8989a36d94 100644 --- a/tests/cross_fw/test_templates/test_ptq_params.py +++ b/tests/cross_fw/test_templates/test_ptq_params.py @@ -21,6 +21,7 @@ from nncf.common.graph.operator_metatypes import OperatorMetatype from nncf.common.graph.operator_metatypes import OutputNoopMetatype from nncf.common.graph.transformations.commands import TargetType +from nncf.common.model import ModelWrapper from nncf.common.quantization.structs import QuantizationPreset from nncf.common.quantization.structs import QuantizationScheme as QuantizationMode from nncf.common.quantization.structs import QuantizerConfig @@ -203,7 +204,7 @@ def test_range_estimator_per_tensor(self, test_params, range_estimator_params): assert min_max_algo._range_estimator_params[QuantizerGroup.ACTIVATIONS] == range_estimator_params params = test_params["test_range_estimator_per_tensor"] - stat_points = min_max_algo.get_statistic_points(params["model"], params["nncf_graph"]) + stat_points = min_max_algo.get_statistic_points(ModelWrapper(params["model"], params["nncf_graph"])) assert len(stat_points) == params["stat_points_num"] for _, stat_point in stat_points.items(): @@ -374,7 +375,7 @@ def test_unified_scales_command_creation(self, mocker): Tensor(self.get_backend_tensor(idx - 1)), Tensor(self.get_backend_tensor(idx + 2)) ) stats.add_statistic_point(StatisticPoint(tp, tc, algo._algorithm_key)) - algo.apply(model, model.nncf_graph, stats) + algo.apply(ModelWrapper(model, model.nncf_graph), stats) mock_transformer.transform.assert_called_once() layout = mock_transformer.transform.call_args.args[0] self.check_unified_scale_layout(layout, unified_scales_group) @@ -423,7 +424,5 @@ def test_empty_statistics(self, mode, mocker): "nncf.quantization.algorithms.min_max.algorithm.MinMaxQuantization._get_quantization_points_overflow_fix", return_value=mocker.MagicMock(), ) - with pytest.raises(nncf.InternalError) as exc_info: - algo.apply(None, None, stat_points) - - assert str(exc_info.value) == "Statistics were not collected for the node A" + with pytest.raises(nncf.InternalError, match="Statistics were not collected for the node A"): + algo.apply(mocker.MagicMock(), stat_points) diff --git a/tests/cross_fw/test_templates/test_smooth_quant.py b/tests/cross_fw/test_templates/test_smooth_quant.py index f4ea260c14e..69ab9096758 100644 --- a/tests/cross_fw/test_templates/test_smooth_quant.py +++ b/tests/cross_fw/test_templates/test_smooth_quant.py @@ -19,6 +19,7 @@ from nncf.common.factory import NNCFGraphFactory from nncf.common.factory import StatisticsAggregatorFactory from nncf.common.graph.graph import NNCFNode +from nncf.common.model import ModelWrapper from nncf.experimental.common.tensor_statistics.collectors import AbsMaxReducer from nncf.experimental.common.tensor_statistics.collectors import MaxAggregator from nncf.parameters import ModelType @@ -165,8 +166,7 @@ def test_smooth_quant_algo(self, model_cls, reference_values, tmpdir): dataset = get_static_dataset(model_cls.INPUT_SIZE, self.get_transform_fn(), self.fn_to_type) quantization_algorithm = self.get_quantization_algorithm(self.get_ignored_scope(model_cls)) - graph = NNCFGraphFactory.create(model) - quantized_model = quantization_algorithm.apply(model, graph, dataset=dataset) + quantized_model = quantization_algorithm.apply(ModelWrapper(model), dataset=dataset).model self.check_scales(quantized_model, reference_values, model_cls) @@ -246,7 +246,7 @@ def test_empty_stats(self, mocker, tmpdir): graph = NNCFGraphFactory.create(model) algo = SmoothQuant(subset_size=1, inplace_statistics=False) - algo_statistic_points = algo.get_statistic_points(model, graph) + algo_statistic_points = algo.get_statistic_points(ModelWrapper(model)) statistics_aggregator = StatisticsAggregatorFactory.create(model, dataset) statistics_aggregator.register_statistic_points(algo_statistic_points) statistics_aggregator.collect_statistics(model, graph) @@ -260,7 +260,7 @@ def test_empty_stats(self, mocker, tmpdir): mocked_transformer = mocker.MagicMock() mocker.patch("nncf.common.factory.ModelTransformerFactory.create", return_value=mocked_transformer) - algo.apply(model, graph, algo_statistic_points) + algo.apply(ModelWrapper(model), algo_statistic_points) mocked_transformer.transform.assert_called_once() arg = mocked_transformer.transform.call_args.args[0] diff --git a/tests/onnx/quantization/common.py b/tests/onnx/quantization/common.py index 18f36b29ee4..48bf787f2d1 100644 --- a/tests/onnx/quantization/common.py +++ b/tests/onnx/quantization/common.py @@ -16,6 +16,7 @@ import onnx from nncf import Dataset +from nncf.common.model import ModelWrapper from nncf.experimental.common.tensor_statistics.statistics import MinMaxTensorStatistic from nncf.onnx.graph.nncf_graph_builder import GraphConverter from nncf.onnx.graph.onnx_helper import get_edge_dtype @@ -108,7 +109,6 @@ def min_max_quantize_model( ) -> onnx.ModelProto: if convert_model_opset: original_model = convert_opset_version(original_model) - graph = GraphConverter.create_nncf_graph(original_model) dataset = get_random_dataset_for_test(original_model, dataset_has_batch_size) quantization_params = {} if quantization_params is None else quantization_params @@ -123,8 +123,8 @@ def min_max_quantize_model( post_training_quantization = PostTrainingQuantization(subset_size=1, **quantization_params) - quantized_model = post_training_quantization.apply(original_model, graph, dataset=dataset) - return quantized_model + quantized_model = post_training_quantization.apply(ModelWrapper(original_model), dataset=dataset) + return quantized_model.model def ptq_quantize_model( diff --git a/tests/openvino/native/quantization/test_fq_params_calculation.py b/tests/openvino/native/quantization/test_fq_params_calculation.py index 5751a34f39b..e1f0d4f8793 100644 --- a/tests/openvino/native/quantization/test_fq_params_calculation.py +++ b/tests/openvino/native/quantization/test_fq_params_calculation.py @@ -15,6 +15,7 @@ import pytest import torch +from nncf.common.model import ModelWrapper from nncf.common.quantization.structs import QuantizationPreset from nncf.openvino.graph.nncf_graph_builder import GraphConverter from nncf.openvino.statistics.aggregator import OVStatisticsAggregator @@ -70,11 +71,11 @@ def quantize_model(ov_model, q_params): min_max_algo = MinMaxQuantization(subset_size=1, **q_params) statistics_aggregator = OVStatisticsAggregator(dataset) - statistic_points = min_max_algo.get_statistic_points(ov_model, graph) + statistic_points = min_max_algo.get_statistic_points(ModelWrapper(ov_model, graph)) statistics_aggregator.register_statistic_points(statistic_points) statistics_aggregator.collect_statistics(ov_model, graph) - quantized_model = min_max_algo.apply(ov_model, graph, statistics_aggregator.statistic_points) - return quantized_model + quantized_model = min_max_algo.apply(ModelWrapper(ov_model, graph), statistics_aggregator.statistic_points) + return quantized_model.model @pytest.fixture(params=[True, False], ids=["inplace", "out_of_place"], name="inplace_statistics") diff --git a/tests/openvino/native/quantization/test_graphs.py b/tests/openvino/native/quantization/test_graphs.py index 7dc3c94c081..2352a009f27 100644 --- a/tests/openvino/native/quantization/test_graphs.py +++ b/tests/openvino/native/quantization/test_graphs.py @@ -18,6 +18,7 @@ import pytest from nncf import Dataset +from nncf.common.model import ModelWrapper from nncf.common.quantization.structs import QuantizationPreset from nncf.openvino.graph.nncf_graph_builder import GraphConverter from nncf.openvino.quantization.quantize_model import quantize_impl @@ -137,10 +138,12 @@ def smooth_quant_model(ov_model: ov.Model, q_params: Dict, quantize=True): smooth_quant_algo = SmoothQuant(subset_size=1) statistics_aggregator = OVStatisticsAggregator(dataset) - statistic_points = smooth_quant_algo.get_statistic_points(ov_model, graph) + statistic_points = smooth_quant_algo.get_statistic_points(ModelWrapper(ov_model, graph)) statistics_aggregator.register_statistic_points(statistic_points) statistics_aggregator.collect_statistics(ov_model, graph) - modified_model = smooth_quant_algo.apply(ov_model, graph, statistics_aggregator.statistic_points) + modified_model = smooth_quant_algo.apply( + ModelWrapper(ov_model, graph), statistics_aggregator.statistic_points + ).model if quantize: modified_model = quantize_model(modified_model, q_params) diff --git a/tests/torch/ptq/test_calculation_quantizer_params.py b/tests/torch/ptq/test_calculation_quantizer_params.py index 556b5f9e387..00f82f0e538 100644 --- a/tests/torch/ptq/test_calculation_quantizer_params.py +++ b/tests/torch/ptq/test_calculation_quantizer_params.py @@ -20,6 +20,7 @@ from nncf import Dataset from nncf.common.graph.transformations.commands import TargetType +from nncf.common.model import ModelWrapper from nncf.common.quantization.structs import QuantizationPreset from nncf.common.quantization.structs import QuantizationScheme as QuantizationMode from nncf.common.quantization.structs import QuantizerConfig @@ -314,16 +315,14 @@ def test_quantizer_parameters_export(tmp_path: Path, _seed): statistics_aggregator = PTStatisticsAggregator(dataset) nncf_network = wrap_model(model, torch.ones([1, 3, 32, 32]), True) - statistic_points = min_max_algo.get_statistic_points(nncf_network, nncf_network.nncf.get_graph()) + statistic_points = min_max_algo.get_statistic_points(ModelWrapper(nncf_network)) statistics_aggregator.register_statistic_points(statistic_points) statistics_aggregator.collect_statistics(model, nncf_network.nncf.get_graph()) - torch_quantized_model = min_max_algo.apply( - nncf_network, nncf_network.nncf.get_graph(), statistics_aggregator.statistic_points - ) + torch_quantized_model = min_max_algo.apply(ModelWrapper(nncf_network), statistics_aggregator.statistic_points) path = str(tmp_path / "torch_ptq_model.onnx") torch.onnx.export( - torch_quantized_model, + torch_quantized_model.model, input_data, path, export_params=True, diff --git a/tests/torch/ptq/test_fq_params_calculation.py b/tests/torch/ptq/test_fq_params_calculation.py index 6d71760cd33..9c2bbe861b8 100644 --- a/tests/torch/ptq/test_fq_params_calculation.py +++ b/tests/torch/ptq/test_fq_params_calculation.py @@ -16,6 +16,7 @@ import torch import nncf +from nncf.common.model import ModelWrapper from nncf.quantization.advanced_parameters import AdvancedQuantizationParameters from nncf.quantization.advanced_parameters import AdvancedSmoothQuantParameters from nncf.quantization.advanced_parameters import OverflowFix @@ -58,8 +59,8 @@ def transform_fn(sample): original_model.eval() nncf_network = wrap_model(original_model, torch.ones([1, 1, 10, 10]), trace_parameters=True) - quantized_model = post_training_quantization.apply(nncf_network, nncf_network.nncf.get_graph(), dataset=dataset) - return quantized_model + quantized_model = post_training_quantization.apply(ModelWrapper(nncf_network), dataset=dataset) + return quantized_model.model def get_fq_nodes(model: NNCFNetwork) -> Dict[Scope, torch.nn.Module]: diff --git a/tests/torch/ptq/test_graphs.py b/tests/torch/ptq/test_graphs.py index eba35163c7c..be902f9daa5 100644 --- a/tests/torch/ptq/test_graphs.py +++ b/tests/torch/ptq/test_graphs.py @@ -16,6 +16,7 @@ import torch from nncf import Dataset +from nncf.common.model import ModelWrapper from nncf.parameters import ModelType from nncf.parameters import TargetDevice from nncf.quantization.advanced_parameters import AdvancedQuantizationParameters @@ -121,8 +122,7 @@ def test_min_max_classification_quantized_graphs(desc: ModelDesc, quantization_p quantization_algorithm = PostTrainingQuantization(**quantization_parameters) quantized_model = quantization_algorithm.apply( - nncf_network, - nncf_network.nncf.get_graph(), + ModelWrapper(nncf_network), dataset=Dataset([example_input]), - ) + ).model check_graph(quantized_model.nncf.get_graph(), desc.dot_filename(), graph_dir) From 92b71439cff10b18a7ee97e489d78418055b547e Mon Sep 17 00:00:00 2001 From: Alexander Dokuchaev Date: Mon, 16 Dec 2024 01:12:55 +0200 Subject: [PATCH 2/3] model_wrapper --- nncf/common/model.py | 51 +++++++++++++++---- .../torch/fx/quantization/quantize_model.py | 25 +++++---- nncf/onnx/quantization/quantize_model.py | 8 +-- .../openvino/quantization/quantize_ifmodel.py | 12 +++-- nncf/openvino/quantization/quantize_model.py | 19 ++++--- nncf/quantization/algorithms/algorithm.py | 11 ++-- .../algorithms/bias_correction/algorithm.py | 22 ++++---- .../algorithms/channel_alignment/algorithm.py | 11 ++-- .../fast_bias_correction/algorithm.py | 19 +++---- .../fast_bias_correction/torch_backend.py | 2 +- .../fast_bias_correction/torch_fx_backend.py | 2 +- .../algorithms/min_max/algorithm.py | 3 +- nncf/quantization/algorithms/pipeline.py | 12 ++--- .../algorithms/post_training/algorithm.py | 11 ++-- .../algorithms/smooth_quant/algorithm.py | 3 +- .../weight_compression/algorithm.py | 10 ++-- nncf/quantization/quantize_model.py | 4 +- nncf/torch/quantization/quantize_model.py | 11 ++-- .../test_templates/test_bias_correction.py | 12 ++--- .../test_templates/test_channel_alignment.py | 7 ++- .../test_fast_bias_correction.py | 4 +- .../test_templates/test_ptq_params.py | 9 ++-- .../test_templates/test_smooth_quant.py | 8 +-- .../onnx/quantization/test_bias_correction.py | 6 +-- .../quantization/test_fast_bias_correction.py | 6 +-- .../test_fq_params_calculation.py | 9 ++-- .../native/quantization/test_graphs.py | 9 ++-- tests/openvino/native/test_bias_correction.py | 6 +-- .../native/test_fast_bias_correction.py | 6 +-- tests/torch/fx/test_bias_correction.py | 7 ++- tests/torch/fx/test_compress_weights.py | 3 +- tests/torch/fx/test_fast_bias_correction.py | 10 ++-- .../ptq/test_calculation_quantizer_params.py | 4 +- tests/torch/ptq/test_fast_bias_correction.py | 11 ++-- 34 files changed, 198 insertions(+), 155 deletions(-) diff --git a/nncf/common/model.py b/nncf/common/model.py index af29c876c11..b0685180376 100644 --- a/nncf/common/model.py +++ b/nncf/common/model.py @@ -10,21 +10,28 @@ # limitations under the License. -from typing import Any, Dict, Optional, TypeVar +from dataclasses import dataclass +from typing import Any, Dict, Optional, Tuple, TypeVar from nncf.common.factory import NNCFGraphFactory from nncf.common.graph.graph import NNCFGraph +from nncf.common.utils.backend import BackendType +from nncf.common.utils.backend import get_backend TModel = TypeVar("TModel") -class StateAttributes: +@dataclass +class ModelAttributes: """ - The state attributes. + A class to store model attributes. + + :param example_input_args: Example input arguments for the model. + :param example_input_kwargs: Example input keyword arguments for the model. """ - EXAMPLE_INPUT_ARGS = "example_input_args" - EXAMPLE_INPUT_KWARGS = "example_input_kwargs" + example_input_args: Optional[Tuple[Any]] = None + example_input_kwargs: Optional[Dict[str, Any]] = None class ModelWrapper: @@ -33,15 +40,17 @@ class ModelWrapper: :param _model: The original model to be wrapped. :param _graph: The graph representation of the model. - :param state: The storage of the model state. + :param _attributes: The storage of the model attributes. + :param _backend: The backend of the model. """ def __init__( - self, model: TModel, graph: Optional[NNCFGraph] = None, state: Optional[Dict[str, Any]] = None + self, model: TModel, *, graph: Optional[NNCFGraph] = None, attributes: Optional[ModelAttributes] = None ) -> None: self._model = model self._graph = graph - self.state = state if state is not None else {} + self._attributes = attributes or ModelAttributes() + self._backend = get_backend(model) @property def model(self) -> TModel: @@ -60,8 +69,28 @@ def graph(self) -> NNCFGraph: """ if self._graph is None: self._graph = NNCFGraphFactory.create( - model=self.model, - input_args=self.state.get(StateAttributes.EXAMPLE_INPUT_ARGS), - input_kwargs=self.state.get(StateAttributes.EXAMPLE_INPUT_KWARGS), + self.model, self.attributes.example_input_args, self.attributes.example_input_kwargs ) return self._graph + + @property + def attributes(self) -> ModelAttributes: + """ + Retrieves the model attributes. + """ + return self._attributes + + @property + def backend(self) -> BackendType: + """ + Retrieves the model backend. + """ + return self._backend + + def unwrap(self) -> Tuple[TModel, NNCFGraph]: + """ + Retrieves the model and graph. + + :return: A tuple of the model and graph. + """ + return self.model, self.graph diff --git a/nncf/experimental/torch/fx/quantization/quantize_model.py b/nncf/experimental/torch/fx/quantization/quantize_model.py index 00b0be8bac1..db368561d34 100644 --- a/nncf/experimental/torch/fx/quantization/quantize_model.py +++ b/nncf/experimental/torch/fx/quantization/quantize_model.py @@ -10,10 +10,8 @@ # limitations under the License. from copy import deepcopy -from typing import Optional +from typing import Optional, cast -import torch -import torch.fx from torch.ao.quantization.pt2e.duplicate_dq_pass import DuplicateDQPass from torch.ao.quantization.pt2e.port_metadata_pass import PortNodeMetaForQDQ from torch.ao.quantization.pt2e.qat_utils import _fold_conv_bn_qat @@ -22,8 +20,8 @@ from torch.fx.passes.infra.pass_manager import PassManager import nncf -from nncf.common.factory import NNCFGraphFactory from nncf.common.logging import nncf_logger +from nncf.common.model import ModelWrapper from nncf.common.quantization.structs import QuantizationPreset from nncf.data import Dataset from nncf.experimental.torch.fx.quantization.backend_parameters import is_weight_compression_needed @@ -46,7 +44,7 @@ def quantize_impl( - model: torch.fx.GraphModule, + model: GraphModule, calibration_dataset: Dataset, mode: Optional[QuantizationMode] = None, preset: Optional[QuantizationPreset] = None, @@ -56,7 +54,7 @@ def quantize_impl( model_type: Optional[ModelType] = None, ignored_scope: Optional[IgnoredScope] = None, advanced_parameters: Optional[AdvancedQuantizationParameters] = None, -) -> torch.fx.GraphModule: +) -> GraphModule: """ Implementation of the `quantize()` method for the Torch FX backend. """ @@ -86,9 +84,9 @@ def quantize_impl( # To make it easier for bias correction algorithms. apply_quantization_transformations(copied_model) - - nncf_graph = NNCFGraphFactory.create(copied_model) - quantized_model = quantization_algorithm.apply(copied_model, nncf_graph, dataset=calibration_dataset) + model_wrapper = ModelWrapper(copied_model) + quantized_model_wrapper = quantization_algorithm.apply(model_wrapper, dataset=calibration_dataset) + quantized_model = cast(GraphModule, quantized_model_wrapper.model) if is_weight_compression_needed(advanced_parameters): compress_post_quantize_transformation(quantized_model) @@ -116,7 +114,7 @@ def quantize_impl( def compress_weights_impl( - model: torch.fx.GraphModule, + model: GraphModule, dataset: Dataset, mode: CompressWeightsMode, ratio: float, @@ -131,7 +129,7 @@ def compress_weights_impl( lora_correction: bool, backup_mode: BackupMode, advanced_parameters: Optional[AdvancedCompressionParameters] = None, -) -> torch.fx.GraphModule: +) -> GraphModule: """ Implementation of the `compress_weights()` method for the Torch Fx backend. """ @@ -151,8 +149,9 @@ def compress_weights_impl( backup_mode, advanced_parameters, ) - graph = NNCFGraphFactory.create(model) - compressed_model = compression_algorithm.apply(model, graph, dataset=dataset) + model_wrapper = ModelWrapper(model) + compressed_model_wrapper = compression_algorithm.apply(model_wrapper, dataset=dataset) + compressed_model = compressed_model_wrapper.model compressed_model = GraphModule(compressed_model, compressed_model.graph) compressed_model = _disallow_eval_train(compressed_model) diff --git a/nncf/onnx/quantization/quantize_model.py b/nncf/onnx/quantization/quantize_model.py index 05f709939ed..d82c8ed5b8a 100644 --- a/nncf/onnx/quantization/quantize_model.py +++ b/nncf/onnx/quantization/quantize_model.py @@ -9,7 +9,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Any, Callable, Iterable, List, Optional, Tuple, TypeVar, Union +from typing import Any, Callable, Iterable, List, Optional, Tuple, TypeVar, Union, cast import onnx @@ -82,9 +82,9 @@ def quantize_impl( warning_model_no_batchwise_support( model_wrapper.graph, advanced_parameters, model_type, OPERATIONS_OUTPUT_HAS_NO_BATCH_AXIS ) - quantized_model = quantization_algorithm.apply(model_wrapper, dataset=calibration_dataset) - - return quantized_model.model + quantized_model_wrapper = quantization_algorithm.apply(model_wrapper, dataset=calibration_dataset) + quantized_model = cast(onnx.ModelProto, quantized_model_wrapper.model) + return quantized_model def quantize_with_accuracy_control_impl( diff --git a/nncf/openvino/quantization/quantize_ifmodel.py b/nncf/openvino/quantization/quantize_ifmodel.py index 3fa652bef21..c559846ec29 100644 --- a/nncf/openvino/quantization/quantize_ifmodel.py +++ b/nncf/openvino/quantization/quantize_ifmodel.py @@ -10,7 +10,7 @@ # limitations under the License. from itertools import islice -from typing import Dict, List, Optional, Tuple +from typing import Dict, List, Optional, Tuple, cast import openvino.runtime as ov @@ -156,9 +156,13 @@ def apply_algorithm_if_bodies( """ nncf_logger.info(f"Iteration [{current_model_num}/{len(graphs)}] ...") parent_graph = graphs[graph_id] - quantized_model = algorithm.apply( - ModelWrapper(parent_model, parent_graph), parent_statistic_points, parent_dataset - ).model + + model_wrapper = ModelWrapper(parent_model, graph=parent_graph) + quantized_model_wrapper = algorithm.apply( + model_wrapper, statistic_points=parent_statistic_points, dataset=parent_dataset + ) + quantized_model = cast(ov.Model, quantized_model_wrapper.model) + if get_number_if_op(parent_model) == 0: return quantized_model, current_model_num model_transformer_fp32 = factory.ModelTransformerFactory.create(parent_model) diff --git a/nncf/openvino/quantization/quantize_model.py b/nncf/openvino/quantization/quantize_model.py index 24b96e1e6ae..b94a7cbc2c4 100644 --- a/nncf/openvino/quantization/quantize_model.py +++ b/nncf/openvino/quantization/quantize_model.py @@ -11,7 +11,7 @@ from copy import deepcopy from pathlib import Path -from typing import Any, Callable, Iterable, List, Optional, Tuple, TypeVar, Union +from typing import Any, Callable, Iterable, List, Optional, Tuple, TypeVar, Union, cast import openvino.runtime as ov from openvino._offline_transformations import compress_quantize_weights_transformation @@ -170,7 +170,8 @@ def native_quantize_impl( warning_model_no_batchwise_support( model_wrapper.graph, advanced_parameters, model_type, OPERATIONS_OUTPUT_HAS_NO_BATCH_AXIS ) - quantized_model = quantization_algorithm.apply(model_wrapper, dataset=calibration_dataset).model + quantized_model_wrapper = quantization_algorithm.apply(model_wrapper, dataset=calibration_dataset) + quantized_model = cast(ov.Model, quantized_model_wrapper.model) if is_weight_compression_needed(advanced_parameters): compress_quantize_weights_transformation(quantized_model) @@ -385,7 +386,7 @@ def compress_weights_impl( Implementation of the `compress_weights()` method for the OpenVINO backend. """ model = remove_friendly_name_duplicates(model) - graph = NNCFGraphFactory.create(model) + model_wrapper = ModelWrapper(model) compression_algorithm = WeightCompression( mode, ratio, @@ -406,18 +407,20 @@ def compress_weights_impl( if advanced_parameters and advanced_parameters.statistics_path: # If there is no such directory, then caches statistics if not Path(advanced_parameters.statistics_path).exists(): - cache_weight_compression_statistics(model, graph, dataset, subset_size, advanced_parameters.statistics_path) + cache_weight_compression_statistics( + model_wrapper.model, model_wrapper.graph, dataset, subset_size, advanced_parameters.statistics_path + ) statistics_aggregator = StatisticsAggregatorFactory.create(model, dataset) compression_algorithm.set_backend_entity(model) - _, matmul_input_to_output_nodes_map = compression_algorithm.get_compression_nodes_info(graph) + _, matmul_input_to_output_nodes_map = compression_algorithm.get_compression_nodes_info(model_wrapper.graph) register_statistics_for_algorithm( statistics_aggregator, - model, - graph, + model_wrapper.model, + model_wrapper.graph, compression_algorithm, matmul_input_to_output_nodes_map, ) statistics_aggregator.load_statistics_from_dir(advanced_parameters.statistics_path) statistics_points = statistics_aggregator.statistic_points - return compression_algorithm.apply(model, graph, statistics_points, dataset) + return compression_algorithm.apply(model_wrapper, statistic_points=statistics_points, dataset=dataset).model diff --git a/nncf/quantization/algorithms/algorithm.py b/nncf/quantization/algorithms/algorithm.py index f5fe6896971..86b21694120 100644 --- a/nncf/quantization/algorithms/algorithm.py +++ b/nncf/quantization/algorithms/algorithm.py @@ -38,24 +38,25 @@ def available_backends(self) -> List[BackendType]: @abstractmethod def apply( self, - model: ModelWrapper, + model_wrapper: ModelWrapper, + *, statistic_points: Optional[StatisticPointsContainer] = None, dataset: Optional[Dataset] = None, - ) -> TModel: + ) -> ModelWrapper: """ Applies the algorithm to the model. - :param model: Model for applying algorithm. + :param model_wrapper: A wrapper object containing the model to be applied. :param statistic_points: Statistic points with collected statistics values. :param dataset: A representative dataset for the calibration process. :return: A resulting model. """ @abstractmethod - def get_statistic_points(self, model: ModelWrapper) -> StatisticPointsContainer: + def get_statistic_points(self, model_wrapper: ModelWrapper) -> StatisticPointsContainer: """ Returns statistic points, for which StatisticsCollector should collect statistics. - :param model: Model for statistics collection. + :param model_wrapper: A wrapper object containing the model for statistics collection. :return: Statistic points, for which StatisticsCollector should collect statistics. """ diff --git a/nncf/quantization/algorithms/bias_correction/algorithm.py b/nncf/quantization/algorithms/bias_correction/algorithm.py index fdfba42ace5..d0bea42e41d 100644 --- a/nncf/quantization/algorithms/bias_correction/algorithm.py +++ b/nncf/quantization/algorithms/bias_correction/algorithm.py @@ -30,7 +30,6 @@ from nncf.common.tensor_statistics.statistic_point import StatisticPointsContainer from nncf.common.utils.backend import BackendType from nncf.common.utils.backend import copy_model -from nncf.common.utils.backend import get_backend from nncf.experimental.common.tensor_statistics.statistical_functions import mean_per_channel from nncf.quantization.algorithms.algorithm import Algorithm from nncf.tensor import Tensor @@ -108,13 +107,12 @@ def __init__( def available_backends(self) -> List[BackendType]: return [BackendType.ONNX, BackendType.OPENVINO, BackendType.TORCH_FX] - def _set_backend_entity(self, model: TModel) -> None: + def _set_backend_entity(self, model_backend: BackendType) -> None: """ Creates a helper class with a backed-specific logic of the algorithm. - :param model: Backend-specific input model. + :param model_backend: Backend of a model. """ - model_backend = get_backend(model) if model_backend == BackendType.ONNX: from nncf.quantization.algorithms.bias_correction.onnx_backend import ONNXBiasCorrectionAlgoBackend @@ -135,11 +133,14 @@ def _set_backend_entity(self, model: TModel) -> None: def apply( self, model_wrapper: ModelWrapper, + *, statistic_points: Optional[StatisticPointsContainer] = None, dataset: Optional[Dataset] = None, - ) -> TModel: + ) -> ModelWrapper: + self._set_backend_entity(model_wrapper.backend) + model = model_wrapper.model - self._set_backend_entity(model) + main_transformations_layout = TransformationLayout() main_model_transformer = ModelTransformerFactory.create(model) @@ -206,7 +207,8 @@ def apply( # to reduce memory usage during the algorithm's pipeline. self._remove_unnecessary_stats(position, subgraphs_data) - return main_model_transformer.transform(main_transformations_layout) + transformed_model = main_model_transformer.transform(main_transformations_layout) + return ModelWrapper(transformed_model, attributes=model_wrapper.attributes) def _is_node_correctable(self, node: NNCFNode, nncf_graph: NNCFGraph) -> bool: """ @@ -555,9 +557,9 @@ def output_filter_func(point): return output_fp def get_statistic_points(self, model_wrapper: ModelWrapper) -> StatisticPointsContainer: - model = model_wrapper.model - graph = model_wrapper.graph - self._set_backend_entity(model_wrapper.model) + self._set_backend_entity(model_wrapper.backend) + model, graph = model_wrapper.unwrap() + statistic_container = StatisticPointsContainer() nodes_with_bias = [ diff --git a/nncf/quantization/algorithms/channel_alignment/algorithm.py b/nncf/quantization/algorithms/channel_alignment/algorithm.py index 2a0fb0f4a1b..d616e8e77e8 100644 --- a/nncf/quantization/algorithms/channel_alignment/algorithm.py +++ b/nncf/quantization/algorithms/channel_alignment/algorithm.py @@ -94,11 +94,12 @@ def _set_backend_entity(self, model: TModel) -> None: def apply( self, - model: TModel, - graph: NNCFGraph, + model_wrapper: ModelWrapper, + *, statistic_points: Optional[StatisticPointsContainer] = None, dataset: Optional[Dataset] = None, - ) -> TModel: + ) -> ModelWrapper: + model, graph = model_wrapper.unwrap() self._set_backend_entity(model) model_transformer = ModelTransformerFactory.create(model) transformation_layout = TransformationLayout() @@ -128,7 +129,7 @@ def filter_func(point: StatisticPoint) -> bool: ): nncf_logger.debug( f"Skipping channel alignment for pairs {conv_in.node_name}, {conv_out.node_name} " - " because one of the node is 1D MatMul, 1D Matmuls are not supported by CA algortihm yet." + "because one of the node is 1D MatMul, 1D Matmuls are not supported by CA algorithm yet." ) continue @@ -171,7 +172,7 @@ def filter_func(point: StatisticPoint) -> bool: transformation_layout.register(command) transformed_model = model_transformer.transform(transformation_layout) - return transformed_model + return ModelWrapper(transformed_model, attributes=model_wrapper.attributes) @staticmethod def _align_means( diff --git a/nncf/quantization/algorithms/fast_bias_correction/algorithm.py b/nncf/quantization/algorithms/fast_bias_correction/algorithm.py index 40c8a87a364..0f3f4142fbc 100644 --- a/nncf/quantization/algorithms/fast_bias_correction/algorithm.py +++ b/nncf/quantization/algorithms/fast_bias_correction/algorithm.py @@ -26,7 +26,6 @@ from nncf.common.tensor_statistics.statistic_point import StatisticPoint from nncf.common.tensor_statistics.statistic_point import StatisticPointsContainer from nncf.common.utils.backend import BackendType -from nncf.common.utils.backend import get_backend from nncf.experimental.common.tensor_statistics.statistical_functions import mean_per_channel from nncf.quantization.algorithms.algorithm import Algorithm from nncf.tensor import Tensor @@ -95,13 +94,12 @@ def __init__( def available_backends(self) -> List[BackendType]: return [BackendType.ONNX, BackendType.OPENVINO, BackendType.TORCH, BackendType.TORCH_FX] - def _set_backend_entity(self, model: TModel) -> None: + def _set_backend_entity(self, model_backend: BackendType) -> None: """ Creates a helper class with a backed-specific logic of the algorithm. - :param model: Backend-specific input model. + :param model_backend: Backend of a model. """ - model_backend = get_backend(model) if model_backend == BackendType.ONNX: from nncf.quantization.algorithms.fast_bias_correction.onnx_backend import ONNXFastBiasCorrectionAlgoBackend @@ -130,12 +128,12 @@ def _set_backend_entity(self, model: TModel) -> None: def apply( self, model_wrapper: ModelWrapper, + *, statistic_points: Optional[StatisticPointsContainer] = None, dataset: Optional[Dataset] = None, ) -> ModelWrapper: - model = model_wrapper.model - graph = model_wrapper.graph - self._set_backend_entity(model) + self._set_backend_entity(model_wrapper.backend) + model, graph = model_wrapper.unwrap() model_transformer = ModelTransformerFactory.create(model) @@ -208,9 +206,7 @@ def apply( transformation_layout.register(self._backend_entity.create_bias_correction_command(node, bias_value, graph)) transformed_model = model_transformer.transform(transformation_layout) - return ModelWrapper( - model=transformed_model, graph=graph, state=model_wrapper.state # BC dows not changed model's graph - ) + return ModelWrapper(transformed_model, attributes=model_wrapper.attributes) @staticmethod def _get_bias_shift_magnitude(current_bias_value: Tensor, updated_bias_value: Tensor) -> Tensor: @@ -349,9 +345,8 @@ def _get_bias_shift( return bias_shift def get_statistic_points(self, model_wrapper: ModelWrapper) -> StatisticPointsContainer: - model = model_wrapper.model + self._set_backend_entity(model_wrapper.backend) graph = model_wrapper.graph - self._set_backend_entity(model) nodes_with_bias = [ node for node in graph.get_all_nodes() if self._backend_entity.is_node_with_bias(node, graph) ] diff --git a/nncf/quantization/algorithms/fast_bias_correction/torch_backend.py b/nncf/quantization/algorithms/fast_bias_correction/torch_backend.py index 7eda61ce64a..a40f0637a74 100644 --- a/nncf/quantization/algorithms/fast_bias_correction/torch_backend.py +++ b/nncf/quantization/algorithms/fast_bias_correction/torch_backend.py @@ -109,5 +109,5 @@ def get_node_names_for_input_output_statistics(node: NNCFNode, nncf_graph: NNCFG return input_node_name, output_node_name @staticmethod - def get_activation_channel_axis(node: NNCFNode, pord_id: int, input_shape: Tuple[int]) -> int: + def get_activation_channel_axis(node: NNCFNode, port_id: int, input_shape: Tuple[int]) -> int: return node.metatype.output_channel_axis diff --git a/nncf/quantization/algorithms/fast_bias_correction/torch_fx_backend.py b/nncf/quantization/algorithms/fast_bias_correction/torch_fx_backend.py index c6c538a52b4..cda0b3b30c6 100644 --- a/nncf/quantization/algorithms/fast_bias_correction/torch_fx_backend.py +++ b/nncf/quantization/algorithms/fast_bias_correction/torch_fx_backend.py @@ -98,5 +98,5 @@ def get_node_names_for_input_output_statistics(node: NNCFNode, nncf_graph: NNCFG return node.node_name, node.node_name @staticmethod - def get_activation_channel_axis(node: NNCFNode, pord_id: int, input_shape: Tuple[int]) -> int: + def get_activation_channel_axis(node: NNCFNode, port_id: int, input_shape: Tuple[int]) -> int: return node.metatype.output_channel_axis diff --git a/nncf/quantization/algorithms/min_max/algorithm.py b/nncf/quantization/algorithms/min_max/algorithm.py index b802ea4536e..2b90aeb02f8 100644 --- a/nncf/quantization/algorithms/min_max/algorithm.py +++ b/nncf/quantization/algorithms/min_max/algorithm.py @@ -891,6 +891,7 @@ def _get_quantization_points_overflow_fix( def apply( self, model_wrapper: ModelWrapper, + *, statistic_points: Optional[StatisticPointsContainer] = None, dataset: Optional[Dataset] = None, ) -> ModelWrapper: @@ -990,7 +991,7 @@ def filter_func(point: StatisticPoint) -> bool: if not transformation_layout.transformations: nncf_logger.info("The model has no operations to apply quantization.") quantized_model = model_transformer.transform(transformation_layout) - return ModelWrapper(quantized_model, state=model_wrapper.state) + return ModelWrapper(quantized_model, attributes=model_wrapper.attributes) def get_statistic_points(self, model_wrapper: ModelWrapper) -> StatisticPointsContainer: self._set_backend_entity(model_wrapper.model) diff --git a/nncf/quantization/algorithms/pipeline.py b/nncf/quantization/algorithms/pipeline.py index ae9d4276a3c..1f659e14125 100644 --- a/nncf/quantization/algorithms/pipeline.py +++ b/nncf/quantization/algorithms/pipeline.py @@ -26,14 +26,14 @@ def collect_statistics( containers: Union[StatisticPointsContainer, List[StatisticPointsContainer]], - model_state: ModelWrapper, + model_wrapper: ModelWrapper, dataset: Dataset, ) -> StatisticPointsContainer: """ Utility method for collecting statistics by model. :param statistic_points: Statistic points that need to be collected. - :param model: A model. + :param model_wrapper: A wrapper object containing the model :param graph: A graph associated with a model. :param dataset: A dataset. :return: Collected statistics. @@ -41,10 +41,10 @@ def collect_statistics( if not isinstance(containers, list): containers = [containers] - statistics_aggregator = StatisticsAggregatorFactory.create(model_state.model, dataset) + statistics_aggregator = StatisticsAggregatorFactory.create(model_wrapper.model, dataset) for container in containers: statistics_aggregator.register_statistic_points(container) - statistics_aggregator.collect_statistics(model_state.model, model_state.graph) + statistics_aggregator.collect_statistics(model_wrapper.model, model_wrapper.graph) return statistics_aggregator.statistic_points @@ -107,10 +107,10 @@ def run_step( """ current_model = model_wrapper - pipeline_steps = self._remove_unsupported_algorithms(get_backend(model_wrapper.model)) + pipeline_steps = self._remove_unsupported_algorithms(model_wrapper.backend) pipeline_step = pipeline_steps[step_index] for algorithm in pipeline_step: - current_model = algorithm.apply(current_model, step_statistics) + current_model = algorithm.apply(current_model, statistic_points=step_statistics) return current_model def run_from_step( diff --git a/nncf/quantization/algorithms/post_training/algorithm.py b/nncf/quantization/algorithms/post_training/algorithm.py index 9fd02014770..80879f361c5 100644 --- a/nncf/quantization/algorithms/post_training/algorithm.py +++ b/nncf/quantization/algorithms/post_training/algorithm.py @@ -10,10 +10,11 @@ # limitations under the License. import itertools -from typing import Callable, List, Optional, TypeVar +from typing import List, Optional, TypeVar from nncf import Dataset from nncf.common.graph.graph import NNCFGraph +from nncf.common.model import ModelWrapper from nncf.common.quantization.structs import QuantizationPreset from nncf.common.tensor_statistics.statistic_point import StatisticPointsContainer from nncf.common.utils.backend import BackendType @@ -26,7 +27,6 @@ from nncf.scopes import IgnoredScope TModel = TypeVar("TModel") -TPass = Callable[[TModel], TModel] class PostTrainingQuantization(Algorithm): @@ -55,7 +55,7 @@ def __init__( - `performance`: Symmetric quantization of weights and activations. - `mixed`: Symmetric quantization of weights and asymmetric quantization of activations. Default value is None. In this case, `mixed` preset is used for `transformer` - model type otherwise `performace`. + model type otherwise `performance`. :param target_device: A target device the specificity of which will be taken into account while compressing in order to obtain the best performance for this type of device. @@ -94,10 +94,11 @@ def get_statistic_points(self, model: TModel, graph: NNCFGraph) -> StatisticPoin def apply( self, - model: TModel, + model: ModelWrapper, + *, statistic_points: Optional[StatisticPointsContainer] = None, dataset: Optional[Dataset] = None, - ) -> TModel: + ) -> ModelWrapper: if dataset is None and len(self._pipeline.pipeline_steps) > 1: raise ValueError( "A dataset is required for the post-training quantization " diff --git a/nncf/quantization/algorithms/smooth_quant/algorithm.py b/nncf/quantization/algorithms/smooth_quant/algorithm.py index 77ab1d8ed9b..a23ebf3b8ef 100644 --- a/nncf/quantization/algorithms/smooth_quant/algorithm.py +++ b/nncf/quantization/algorithms/smooth_quant/algorithm.py @@ -100,6 +100,7 @@ def _set_backend_entity(self, model: TModel) -> None: def apply( self, model_wrapper: ModelWrapper, + *, statistic_points: Optional[StatisticPointsContainer] = None, dataset: Optional[Dataset] = None, ) -> ModelWrapper: @@ -178,7 +179,7 @@ def apply( transformation_layout.register(scale_insertion_command) transformed_model = model_transformer.transform(transformation_layout) - return ModelWrapper(model=transformed_model, state=model_wrapper.state) + return ModelWrapper(transformed_model, attributes=model_wrapper.attributes) @staticmethod def _calculate_scale_and_ratio( diff --git a/nncf/quantization/algorithms/weight_compression/algorithm.py b/nncf/quantization/algorithms/weight_compression/algorithm.py index c5a4e2d221c..6f016326fa8 100644 --- a/nncf/quantization/algorithms/weight_compression/algorithm.py +++ b/nncf/quantization/algorithms/weight_compression/algorithm.py @@ -24,6 +24,7 @@ from nncf.common.graph.transformations.commands import TargetType from nncf.common.logging import nncf_logger from nncf.common.logging.track_progress import track +from nncf.common.model import ModelWrapper from nncf.common.scopes import should_consider_scope from nncf.common.tensor_statistics.statistic_point import StatisticPoint from nncf.common.tensor_statistics.statistic_point import StatisticPointsContainer @@ -480,11 +481,12 @@ def _get_ignored_scope_weight_statistics(self, model: TModel, graph: NNCFGraph) def apply( self, - model: TModel, - graph: NNCFGraph, + model_wrapper: ModelWrapper, + *, statistic_points: Optional[StatisticPointsContainer] = None, dataset: Optional[Dataset] = None, - ) -> TModel: + ) -> ModelWrapper: + model, graph = model_wrapper.unwrap() self.set_backend_entity(model) nodes_to_compress = self.get_nodes_to_compress(graph) @@ -667,7 +669,7 @@ def apply( }, algo_name="weight_compression", ) - return transformed_model + return ModelWrapper(transformed_model, attributes=model_wrapper.attributes) def _get_activation_node_and_port(self, node: NNCFNode, nncf_graph: NNCFGraph) -> Tuple[NNCFNode, int]: """ diff --git a/nncf/quantization/quantize_model.py b/nncf/quantization/quantize_model.py index eb520bfcd1b..b039919aa5c 100644 --- a/nncf/quantization/quantize_model.py +++ b/nncf/quantization/quantize_model.py @@ -67,7 +67,7 @@ def warning_model_no_batchwise_support( :param graph: Model's NNCFGraph. :param advanced_quantization_parameters: AdvancedQuantizationParameters. :param model_type: Model type algorithm option. - :param no_batchwise_support_metatypes: Meatypes having no batchwise statistics support. + :param no_batchwise_support_metatypes: Metatypes having no batchwise statistics support. """ if is_model_no_batchwise_support( graph, advanced_quantization_parameters, model_type, no_batchwise_support_metatypes @@ -87,7 +87,7 @@ def is_model_no_batchwise_support( :param graph: Model's NNCFGraph. :param advanced_quantization_parameters: AdvancedQuantizationParameters. :param model_type: Model type algorithm option. - :param no_batchwise_support_metatypes: Meatypes having no batchwise statistics support. + :param no_batchwise_support_metatypes: Metatypes having no batchwise statistics support. """ return ( advanced_quantization_parameters diff --git a/nncf/torch/quantization/quantize_model.py b/nncf/torch/quantization/quantize_model.py index fb3ee792730..2b260cf3041 100644 --- a/nncf/torch/quantization/quantize_model.py +++ b/nncf/torch/quantization/quantize_model.py @@ -10,12 +10,11 @@ # limitations under the License. from copy import deepcopy -from typing import Optional +from typing import Optional, cast import torch import nncf -from nncf.common.factory import NNCFGraphFactory from nncf.common.model import ModelWrapper from nncf.common.quantization.structs import QuantizationPreset from nncf.data import Dataset @@ -79,7 +78,9 @@ def quantize_impl( model_wrapper.graph, advanced_parameters, model_type, OPERATIONS_OUTPUT_HAS_NO_BATCH_AXIS ) - quantized_model: NNCFNetwork = quantization_algorithm.apply(model_wrapper, dataset=calibration_dataset).model + quantized_model_wrapper = quantization_algorithm.apply(model_wrapper, dataset=calibration_dataset) + quantized_model = cast(NNCFNetwork, quantized_model_wrapper.model) + quantized_model.nncf.disable_dynamic_graph_building() return quantized_model @@ -120,5 +121,5 @@ def compress_weights_impl( backup_mode, advanced_parameters, ) - graph = NNCFGraphFactory.create(model) - return compression_algorithm.apply(model, graph, dataset=dataset) + model_wrapper = ModelWrapper(model) + return compression_algorithm.apply(model_wrapper, dataset=dataset).model diff --git a/tests/cross_fw/test_templates/test_bias_correction.py b/tests/cross_fw/test_templates/test_bias_correction.py index 6fe670c7496..bfdbc2a13c9 100644 --- a/tests/cross_fw/test_templates/test_bias_correction.py +++ b/tests/cross_fw/test_templates/test_bias_correction.py @@ -14,7 +14,6 @@ import pytest -from nncf.common.factory import NNCFGraphFactory from nncf.common.model import ModelWrapper from nncf.data import Dataset from nncf.quantization.advanced_parameters import AdvancedQuantizationParameters @@ -82,7 +81,7 @@ def backend_specific_model(model: TModel, tmp_dir: str) -> TModel: @staticmethod @abstractmethod - def check_bias(model: TModel, ref_biases: Dict) -> None: + def check_bias(model_wrapper: ModelWrapper, ref_biases: Dict) -> None: """ Checks biases values. """ @@ -150,16 +149,17 @@ def test_update_bias(self, model_cls, ref_biases, tmpdir): dataset = Dataset(self.get_dataset(model_cls.INPUT_SIZE), self.get_transform_fn()) quantization_algorithm = self.get_quantization_algorithm() - quantized_model = quantization_algorithm.apply(ModelWrapper(model), dataset=dataset) + quantized_model_wrapper = quantization_algorithm.apply(ModelWrapper(model), dataset=dataset) mapped_ref_biases = self.map_references(ref_biases, model_cls) - self.check_bias(quantized_model, mapped_ref_biases) + self.check_bias(quantized_model_wrapper, mapped_ref_biases) def test__get_subgraph_data_for_node(self, quantized_test_model, layer_name, ref_data): - nncf_graph = NNCFGraphFactory.create(quantized_test_model) + model_wrapper = ModelWrapper(quantized_test_model) + nncf_graph = model_wrapper.graph bc_algo = self.get_bias_correction_algorithm() - bc_algo._set_backend_entity(quantized_test_model) + bc_algo._set_backend_entity(model_wrapper.backend) node = nncf_graph.get_node_by_name(layer_name) bc_algo._collected_stat_inputs_map.update(ref_data["collected_inputs"]) diff --git a/tests/cross_fw/test_templates/test_channel_alignment.py b/tests/cross_fw/test_templates/test_channel_alignment.py index 373fe356802..ab9d330ec21 100644 --- a/tests/cross_fw/test_templates/test_channel_alignment.py +++ b/tests/cross_fw/test_templates/test_channel_alignment.py @@ -416,7 +416,9 @@ def dims_iter(*args, **kwargs): ref_bias_in_after_scale_align, ) ) - algorithm.apply(None, nncf_graph, statistic_points) + + mocker.patch("nncf.common.model.get_backend", return_value=None) + algorithm.apply(ModelWrapper(None, graph=nncf_graph), statistic_points=statistic_points) if empty_statistics or one_dim_mm: assert algorithm._align_means.call_count == 0 @@ -512,7 +514,8 @@ class MockBackend(backend_cls): MockBackend.get_statistic_collector = mocker.MagicMock(return_value=ref_stat_collector) algorithm._backend_entity = MockBackend - statistic_container = algorithm.get_statistic_points(ModelWrapper(None, nncf_graph)) + mocker.patch("nncf.common.model.get_backend", return_value=None) + statistic_container = algorithm.get_statistic_points(ModelWrapper(None, graph=nncf_graph)) backend_cls = self.get_backend_cls() target_node_name = "/Add_1_0" if num_biases else "/Conv_1_0" diff --git a/tests/cross_fw/test_templates/test_fast_bias_correction.py b/tests/cross_fw/test_templates/test_fast_bias_correction.py index 22c91654c11..873226f9a54 100644 --- a/tests/cross_fw/test_templates/test_fast_bias_correction.py +++ b/tests/cross_fw/test_templates/test_fast_bias_correction.py @@ -115,6 +115,6 @@ def test_update_bias(self, model_cls, ref_bias, tmpdir): dataset = get_static_dataset(model_cls.INPUT_SIZE, self.get_transform_fn(), self.fn_to_type) quantization_algorithm = self.get_quantization_algorithm() - quantized_model = quantization_algorithm.apply(ModelWrapper(model), dataset=dataset) + quantized_model_wrapper = quantization_algorithm.apply(ModelWrapper(model), dataset=dataset) - self.check_bias(quantized_model.model, ref_bias) + self.check_bias(quantized_model_wrapper, ref_bias) diff --git a/tests/cross_fw/test_templates/test_ptq_params.py b/tests/cross_fw/test_templates/test_ptq_params.py index d8989a36d94..6d2039b6d7f 100644 --- a/tests/cross_fw/test_templates/test_ptq_params.py +++ b/tests/cross_fw/test_templates/test_ptq_params.py @@ -204,7 +204,7 @@ def test_range_estimator_per_tensor(self, test_params, range_estimator_params): assert min_max_algo._range_estimator_params[QuantizerGroup.ACTIVATIONS] == range_estimator_params params = test_params["test_range_estimator_per_tensor"] - stat_points = min_max_algo.get_statistic_points(ModelWrapper(params["model"], params["nncf_graph"])) + stat_points = min_max_algo.get_statistic_points(ModelWrapper(params["model"], graph=params["nncf_graph"])) assert len(stat_points) == params["stat_points_num"] for _, stat_point in stat_points.items(): @@ -375,7 +375,10 @@ def test_unified_scales_command_creation(self, mocker): Tensor(self.get_backend_tensor(idx - 1)), Tensor(self.get_backend_tensor(idx + 2)) ) stats.add_statistic_point(StatisticPoint(tp, tc, algo._algorithm_key)) - algo.apply(ModelWrapper(model, model.nncf_graph), stats) + + mocker.patch("nncf.common.model.get_backend", return_value=None) + model_wrapper = ModelWrapper(model, graph=model.nncf_graph) + algo.apply(model_wrapper, statistic_points=stats) mock_transformer.transform.assert_called_once() layout = mock_transformer.transform.call_args.args[0] self.check_unified_scale_layout(layout, unified_scales_group) @@ -425,4 +428,4 @@ def test_empty_statistics(self, mode, mocker): return_value=mocker.MagicMock(), ) with pytest.raises(nncf.InternalError, match="Statistics were not collected for the node A"): - algo.apply(mocker.MagicMock(), stat_points) + algo.apply(mocker.MagicMock(), statistic_points=stat_points) diff --git a/tests/cross_fw/test_templates/test_smooth_quant.py b/tests/cross_fw/test_templates/test_smooth_quant.py index 69ab9096758..83336a57701 100644 --- a/tests/cross_fw/test_templates/test_smooth_quant.py +++ b/tests/cross_fw/test_templates/test_smooth_quant.py @@ -244,9 +244,10 @@ def test_empty_stats(self, mocker, tmpdir): model = self.backend_specific_model(model_cls(), tmpdir) dataset = get_static_dataset(model_cls.INPUT_SIZE, self.get_transform_fn(), self.fn_to_type) - graph = NNCFGraphFactory.create(model) + model_wrapper = ModelWrapper(model) + graph = model_wrapper.graph algo = SmoothQuant(subset_size=1, inplace_statistics=False) - algo_statistic_points = algo.get_statistic_points(ModelWrapper(model)) + algo_statistic_points = algo.get_statistic_points(model_wrapper) statistics_aggregator = StatisticsAggregatorFactory.create(model, dataset) statistics_aggregator.register_statistic_points(algo_statistic_points) statistics_aggregator.collect_statistics(model, graph) @@ -260,7 +261,8 @@ def test_empty_stats(self, mocker, tmpdir): mocked_transformer = mocker.MagicMock() mocker.patch("nncf.common.factory.ModelTransformerFactory.create", return_value=mocked_transformer) - algo.apply(ModelWrapper(model), algo_statistic_points) + mocker.patch("nncf.common.model.get_backend", return_value=None) + algo.apply(model_wrapper, statistic_points=algo_statistic_points) mocked_transformer.transform.assert_called_once() arg = mocked_transformer.transform.call_args.args[0] diff --git a/tests/onnx/quantization/test_bias_correction.py b/tests/onnx/quantization/test_bias_correction.py index 8ffe20c6102..5c4187d929e 100644 --- a/tests/onnx/quantization/test_bias_correction.py +++ b/tests/onnx/quantization/test_bias_correction.py @@ -16,7 +16,7 @@ import pytest import torch -from nncf.common.factory import NNCFGraphFactory +from nncf.common.model import ModelWrapper from nncf.onnx.graph.model_utils import remove_fq_from_inputs from nncf.onnx.graph.nncf_graph_builder import GraphConverter from nncf.onnx.graph.node_utils import get_bias_value @@ -75,8 +75,8 @@ def compare_nncf_graphs(model: onnx.ModelProto, ref_path: str) -> None: return compare_nncf_graph(model, ref_path) @staticmethod - def check_bias(model: onnx.ModelProto, ref_biases: Dict) -> None: - nncf_graph = NNCFGraphFactory.create(model) + def check_bias(model_wrapper: ModelWrapper, ref_biases: Dict) -> None: + model, nncf_graph = model_wrapper.unwrap() for ref_name, ref_value in ref_biases.items(): node = nncf_graph.get_node_by_name(ref_name) ref_value = np.array(ref_value) diff --git a/tests/onnx/quantization/test_fast_bias_correction.py b/tests/onnx/quantization/test_fast_bias_correction.py index 0ed364ecb80..9dfc92c170f 100644 --- a/tests/onnx/quantization/test_fast_bias_correction.py +++ b/tests/onnx/quantization/test_fast_bias_correction.py @@ -15,7 +15,7 @@ import onnx import torch -from nncf.common.factory import NNCFGraphFactory +from nncf.common.model import ModelWrapper from nncf.onnx.graph.node_utils import get_bias_value from nncf.onnx.graph.node_utils import is_node_with_bias from nncf.quantization.algorithms.fast_bias_correction.onnx_backend import ONNXFastBiasCorrectionAlgoBackend @@ -58,9 +58,9 @@ def transform_fn(data_item): return transform_fn @staticmethod - def check_bias(model: onnx.ModelProto, ref_bias: list): + def check_bias(model_wrapper: ModelWrapper, ref_bias: list): ref_bias = np.array(ref_bias) - nncf_graph = NNCFGraphFactory.create(model) + model, nncf_graph = model_wrapper.unwrap() for node in nncf_graph.get_all_nodes(): if not is_node_with_bias(node): continue diff --git a/tests/openvino/native/quantization/test_fq_params_calculation.py b/tests/openvino/native/quantization/test_fq_params_calculation.py index e1f0d4f8793..67a024a1fa4 100644 --- a/tests/openvino/native/quantization/test_fq_params_calculation.py +++ b/tests/openvino/native/quantization/test_fq_params_calculation.py @@ -17,7 +17,6 @@ from nncf.common.model import ModelWrapper from nncf.common.quantization.structs import QuantizationPreset -from nncf.openvino.graph.nncf_graph_builder import GraphConverter from nncf.openvino.statistics.aggregator import OVStatisticsAggregator from nncf.parameters import QuantizationMode from nncf.quantization.advanced_parameters import OverflowFix @@ -67,14 +66,14 @@ def get_fq_nodes_stats_algo(model): def quantize_model(ov_model, q_params): dataset = get_dataset_for_test(ov_model) - graph = GraphConverter.create_nncf_graph(ov_model) + model_wrapper = ModelWrapper(ov_model) min_max_algo = MinMaxQuantization(subset_size=1, **q_params) statistics_aggregator = OVStatisticsAggregator(dataset) - statistic_points = min_max_algo.get_statistic_points(ModelWrapper(ov_model, graph)) + statistic_points = min_max_algo.get_statistic_points(model_wrapper) statistics_aggregator.register_statistic_points(statistic_points) - statistics_aggregator.collect_statistics(ov_model, graph) - quantized_model = min_max_algo.apply(ModelWrapper(ov_model, graph), statistics_aggregator.statistic_points) + statistics_aggregator.collect_statistics(model_wrapper.model, model_wrapper.graph) + quantized_model = min_max_algo.apply(model_wrapper, statistic_points=statistics_aggregator.statistic_points) return quantized_model.model diff --git a/tests/openvino/native/quantization/test_graphs.py b/tests/openvino/native/quantization/test_graphs.py index 2352a009f27..9ddf2e0e767 100644 --- a/tests/openvino/native/quantization/test_graphs.py +++ b/tests/openvino/native/quantization/test_graphs.py @@ -20,7 +20,6 @@ from nncf import Dataset from nncf.common.model import ModelWrapper from nncf.common.quantization.structs import QuantizationPreset -from nncf.openvino.graph.nncf_graph_builder import GraphConverter from nncf.openvino.quantization.quantize_model import quantize_impl from nncf.openvino.statistics.aggregator import OVStatisticsAggregator from nncf.parameters import ModelType @@ -134,15 +133,15 @@ def test_real_models_sq_placement(model_name_params, tmp_path): def smooth_quant_model(ov_model: ov.Model, q_params: Dict, quantize=True): dataset = get_dataset_for_test(ov_model) - graph = GraphConverter.create_nncf_graph(ov_model) + model_wrapper = ModelWrapper(ov_model) smooth_quant_algo = SmoothQuant(subset_size=1) statistics_aggregator = OVStatisticsAggregator(dataset) - statistic_points = smooth_quant_algo.get_statistic_points(ModelWrapper(ov_model, graph)) + statistic_points = smooth_quant_algo.get_statistic_points(model_wrapper) statistics_aggregator.register_statistic_points(statistic_points) - statistics_aggregator.collect_statistics(ov_model, graph) + statistics_aggregator.collect_statistics(model_wrapper.model, model_wrapper.graph) modified_model = smooth_quant_algo.apply( - ModelWrapper(ov_model, graph), statistics_aggregator.statistic_points + model_wrapper, statistic_points=statistics_aggregator.statistic_points ).model if quantize: diff --git a/tests/openvino/native/test_bias_correction.py b/tests/openvino/native/test_bias_correction.py index 711c831facd..b8af94b31fe 100644 --- a/tests/openvino/native/test_bias_correction.py +++ b/tests/openvino/native/test_bias_correction.py @@ -16,7 +16,7 @@ import pytest import torch -from nncf.common.factory import NNCFGraphFactory +from nncf.common.model import ModelWrapper from nncf.openvino.graph.model_utils import remove_fq_from_inputs from nncf.openvino.graph.nncf_graph_builder import GraphConverter from nncf.openvino.graph.node_utils import get_bias_value @@ -79,8 +79,8 @@ def compare_nncf_graphs(model: ov.Model, ref_path: str) -> None: return compare_nncf_graphs(model, ref_path) @staticmethod - def check_bias(model: ov.Model, ref_biases: Dict) -> None: - nncf_graph = NNCFGraphFactory.create(model) + def check_bias(model_wrapper: ModelWrapper, ref_biases: Dict) -> None: + model, nncf_graph = model_wrapper.unwrap() for ref_name, ref_value in ref_biases.items(): node = nncf_graph.get_node_by_name(ref_name) ref_value = np.array(ref_value) diff --git a/tests/openvino/native/test_fast_bias_correction.py b/tests/openvino/native/test_fast_bias_correction.py index 6de9523bead..9c51f334d81 100644 --- a/tests/openvino/native/test_fast_bias_correction.py +++ b/tests/openvino/native/test_fast_bias_correction.py @@ -15,7 +15,7 @@ import openvino as ov import torch -from nncf.common.factory import NNCFGraphFactory +from nncf.common.model import ModelWrapper from nncf.common.utils.os import is_macos from nncf.openvino.graph.node_utils import get_bias_value from nncf.openvino.graph.node_utils import is_node_with_bias @@ -52,9 +52,9 @@ def transform_fn(data_item): return transform_fn @staticmethod - def check_bias(model: ov.Model, ref_bias: list): + def check_bias(model_wrapper: ModelWrapper, ref_bias: list): ref_bias = np.array(ref_bias) - nncf_graph = NNCFGraphFactory.create(model) + model, nncf_graph = model_wrapper.unwrap() atol = 0.0001 if not is_macos() else 0.01 diff --git a/tests/torch/fx/test_bias_correction.py b/tests/torch/fx/test_bias_correction.py index 06db212fe79..34d5edf0b21 100644 --- a/tests/torch/fx/test_bias_correction.py +++ b/tests/torch/fx/test_bias_correction.py @@ -13,11 +13,10 @@ from typing import Any, Dict, List import numpy as np -import openvino as ov import pytest import torch.fx -from nncf.common.factory import NNCFGraphFactory +from nncf.common.model import ModelWrapper from nncf.experimental.torch.fx.model_utils import remove_fq_from_inputs from nncf.experimental.torch.fx.nncf_graph_builder import GraphConverter from nncf.experimental.torch.fx.node_utils import get_bias_value @@ -77,8 +76,8 @@ def remove_fq_from_inputs(model: torch.fx.GraphModule) -> torch.fx.GraphModule: return remove_fq_from_inputs(model, graph) @staticmethod - def check_bias(model: ov.Model, ref_biases: Dict) -> None: - nncf_graph = NNCFGraphFactory.create(model) + def check_bias(model_wrapper: ModelWrapper, ref_biases: Dict) -> None: + model, nncf_graph = model_wrapper.unwrap() for ref_name, ref_value in ref_biases.items(): node = nncf_graph.get_node_by_name(ref_name) ref_value = torch.tensor(ref_value) diff --git a/tests/torch/fx/test_compress_weights.py b/tests/torch/fx/test_compress_weights.py index 0de35aef29e..4ea029ba5cd 100644 --- a/tests/torch/fx/test_compress_weights.py +++ b/tests/torch/fx/test_compress_weights.py @@ -13,6 +13,7 @@ import pytest import torch +from torch.fx import GraphModule import nncf from nncf import BackupMode @@ -52,7 +53,7 @@ def get_model_size(model): def get_compressed_modules_weights( - compressed_model: torch.fx.GraphModule, dtype: torch.dtype, compressed_node_weight_port: Dict[str, int] + compressed_model: GraphModule, dtype: torch.dtype, compressed_node_weight_port: Dict[str, int] ): n_target_modules = 0 n_compressed_weights = 0 diff --git a/tests/torch/fx/test_fast_bias_correction.py b/tests/torch/fx/test_fast_bias_correction.py index 8c94a32cafd..16da7d24003 100644 --- a/tests/torch/fx/test_fast_bias_correction.py +++ b/tests/torch/fx/test_fast_bias_correction.py @@ -15,7 +15,7 @@ import torch import torch.fx -from nncf.common.factory import NNCFGraphFactory +from nncf.common.model import ModelWrapper from nncf.quantization.algorithms.fast_bias_correction.torch_fx_backend import FXFastBiasCorrectionAlgoBackend from nncf.torch.model_graph_manager import OPERATORS_WITH_BIAS_METATYPES from tests.cross_fw.test_templates.test_fast_bias_correction import TemplateTestFBCAlgorithm @@ -49,9 +49,9 @@ def transform_fn(data_item): return transform_fn @staticmethod - def check_bias(model: torch.fx.GraphModule, ref_bias: list): + def check_bias(model_wrapper: ModelWrapper, ref_bias: list): + model, nncf_graph = model_wrapper.unwrap() ref_bias = torch.Tensor(ref_bias) - nncf_graph = NNCFGraphFactory.create(model) for node in nncf_graph.get_all_nodes(): if node.metatype not in OPERATORS_WITH_BIAS_METATYPES: continue @@ -77,7 +77,3 @@ def backend_specific_model(model: bool, tmp_dir: str): @staticmethod def fn_to_type(tensor): return torch.Tensor(tensor).cuda() - - @staticmethod - def check_bias(model: torch.fx.GraphModule, ref_bias: list): - TestTorchFXFBCAlgorithm.check_bias(model, ref_bias) diff --git a/tests/torch/ptq/test_calculation_quantizer_params.py b/tests/torch/ptq/test_calculation_quantizer_params.py index 00f82f0e538..06cbfd32120 100644 --- a/tests/torch/ptq/test_calculation_quantizer_params.py +++ b/tests/torch/ptq/test_calculation_quantizer_params.py @@ -318,7 +318,9 @@ def test_quantizer_parameters_export(tmp_path: Path, _seed): statistic_points = min_max_algo.get_statistic_points(ModelWrapper(nncf_network)) statistics_aggregator.register_statistic_points(statistic_points) statistics_aggregator.collect_statistics(model, nncf_network.nncf.get_graph()) - torch_quantized_model = min_max_algo.apply(ModelWrapper(nncf_network), statistics_aggregator.statistic_points) + torch_quantized_model = min_max_algo.apply( + ModelWrapper(nncf_network), statistic_points=statistics_aggregator.statistic_points + ) path = str(tmp_path / "torch_ptq_model.onnx") torch.onnx.export( diff --git a/tests/torch/ptq/test_fast_bias_correction.py b/tests/torch/ptq/test_fast_bias_correction.py index 5b3c6c0ce3c..04c0af32a7f 100644 --- a/tests/torch/ptq/test_fast_bias_correction.py +++ b/tests/torch/ptq/test_fast_bias_correction.py @@ -14,11 +14,10 @@ import pytest import torch -from nncf.common.factory import NNCFGraphFactory +from nncf.common.model import ModelWrapper from nncf.quantization.algorithms.fast_bias_correction.torch_backend import PTFastBiasCorrectionAlgoBackend from nncf.torch.model_graph_manager import get_fused_bias_value from nncf.torch.model_graph_manager import is_node_with_fused_bias -from nncf.torch.nncf_network import NNCFNetwork from tests.cross_fw.test_templates.test_fast_bias_correction import TemplateTestFBCAlgorithm from tests.torch.ptq.helpers import get_nncf_network @@ -49,9 +48,9 @@ def transform_fn(data_item): return transform_fn @staticmethod - def check_bias(model: NNCFNetwork, ref_bias: list): + def check_bias(model_wrapper: ModelWrapper, ref_bias: list): ref_bias = torch.Tensor(ref_bias) - nncf_graph = NNCFGraphFactory.create(model) + model, nncf_graph = model_wrapper.unwrap() for node in nncf_graph.get_all_nodes(): if not is_node_with_fused_bias(node, nncf_graph): continue @@ -78,9 +77,9 @@ def fn_to_type(tensor): return torch.Tensor(tensor).cuda() @staticmethod - def check_bias(model: NNCFNetwork, ref_bias: list): + def check_bias(model_wrapper: ModelWrapper, ref_bias: list): + model, nncf_graph = model_wrapper.unwrap() ref_bias = torch.Tensor(ref_bias) - nncf_graph = NNCFGraphFactory.create(model) for node in nncf_graph.get_all_nodes(): if not is_node_with_fused_bias(node, nncf_graph): continue From acdaa7cc21871b193603767e666769c25b6787ae Mon Sep 17 00:00:00 2001 From: Alexander Dokuchaev Date: Tue, 17 Dec 2024 05:14:52 +0200 Subject: [PATCH 3/3] f --- nncf/quantization/algorithms/pipeline.py | 14 +++++++------- .../algorithms/post_training/algorithm.py | 4 ++-- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/nncf/quantization/algorithms/pipeline.py b/nncf/quantization/algorithms/pipeline.py index 1f659e14125..0159955ae0c 100644 --- a/nncf/quantization/algorithms/pipeline.py +++ b/nncf/quantization/algorithms/pipeline.py @@ -115,7 +115,7 @@ def run_step( def run_from_step( self, - model: ModelWrapper, + model_wrapper: ModelWrapper, dataset: Dataset, start_step_index: int = 0, step_index_to_statistics: Optional[Dict[int, StatisticPointsContainer]] = None, @@ -134,23 +134,23 @@ def run_from_step( :return: The updated model after executing the pipeline from the specified pipeline step to the end. """ - pipeline_steps = self._remove_unsupported_algorithms(get_backend(model.model)) + pipeline_steps = self._remove_unsupported_algorithms(model_wrapper.backend) if step_index_to_statistics is None: step_index_to_statistics = {} # The `step_model` and `step_graph` entities are required to execute `step_index`-th pipeline step - step_model = model + step_model_wrapper = model_wrapper for step_index in range(start_step_index, len(pipeline_steps)): # Collect statistics required to run current pipeline step step_statistics = step_index_to_statistics.get(step_index) if step_statistics is None: - statistic_points = self.get_statistic_points_for_step(step_index, step_model) - step_statistics = collect_statistics(statistic_points, step_model, dataset) + statistic_points = self.get_statistic_points_for_step(step_index, step_model_wrapper) + step_statistics = collect_statistics(statistic_points, step_model_wrapper, dataset) # Run current pipeline step - step_model = self.run_step(step_index, step_statistics, step_model) + step_model_wrapper = self.run_step(step_index, step_statistics, step_model_wrapper) - return step_model + return step_model_wrapper def get_statistic_points_for_step(self, step_index: int, model_wrapper: ModelWrapper) -> StatisticPointsContainer: """ diff --git a/nncf/quantization/algorithms/post_training/algorithm.py b/nncf/quantization/algorithms/post_training/algorithm.py index 80879f361c5..9466688a0d7 100644 --- a/nncf/quantization/algorithms/post_training/algorithm.py +++ b/nncf/quantization/algorithms/post_training/algorithm.py @@ -94,7 +94,7 @@ def get_statistic_points(self, model: TModel, graph: NNCFGraph) -> StatisticPoin def apply( self, - model: ModelWrapper, + model_wrapper: ModelWrapper, *, statistic_points: Optional[StatisticPointsContainer] = None, dataset: Optional[Dataset] = None, @@ -109,4 +109,4 @@ def apply( if statistic_points: step_index_to_statistics = {0: statistic_points} - return self._pipeline.run_from_step(model, dataset, 0, step_index_to_statistics) + return self._pipeline.run_from_step(model_wrapper, dataset, 0, step_index_to_statistics)