Skip to content

Commit

Permalink
Merge branch 'kaihui/ar_v02_3x' of https://github.com/intel/neural-co…
Browse files Browse the repository at this point in the history
…mpressor into kaihui/ar_v02_3x
  • Loading branch information
Kaihui-intel committed May 23, 2024
2 parents 6be70fa + 78af942 commit 3f3b2fd
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 7 deletions.
12 changes: 6 additions & 6 deletions neural_compressor/torch/algorithms/weight_only/autoround.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import copy
import json
import time
import copy
from typing import Union

import torch
Expand Down Expand Up @@ -135,6 +135,7 @@ def pack_model(
set_module(compressed_model, k, new_module)
return compressed_model


class InputCaptureModule(torch.nn.Module):

def __init__(self, model) -> None:
Expand All @@ -154,6 +155,7 @@ def forward(self, *args, **kwargs):
logger.error("Handle cases where input data is neither a Tensor nor a dict")
return self.orig_model.forward(*args, **kwargs)


class AutoRoundQuantizer(Quantizer):
def __init__(
self,
Expand Down Expand Up @@ -280,7 +282,7 @@ def prepare(self, model: torch.nn.Module, *args, **kwargs):
data_type=self.data_type,
scale_dtype=self.scale_dtype,
)

self.rounder.prepare()
prepare_model = InputCaptureModule(model)
return prepare_model
Expand Down Expand Up @@ -384,7 +386,7 @@ def get_autoround_default_run_fn(


class AutoRoundProcessor(AutoRound):

@torch.no_grad()
def cache_inter_data(self, block_names, n_samples, layer_names=[], last_cache_name=None):
"""Save the inputs of block_name for calibration. For layers, we cache both of inputs and output.
Expand Down Expand Up @@ -431,8 +433,7 @@ def cache_inter_data(self, block_names, n_samples, layer_names=[], last_cache_na
self.model = self.model.to(tmp_dtype)

return res



@torch.no_grad()
def prepare(self):
"""Prepares a given model for quantization."""
Expand Down Expand Up @@ -512,7 +513,6 @@ def convert(self):
self.model = self.model.to("cpu")

all_inputs = res


del self.inputs
inputs = all_inputs[self.block_names[0]]
Expand Down
2 changes: 1 addition & 1 deletion test/3x/torch/quantization/weight_only/test_autoround.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def test_autoround(self, quant_lm_head):

# prepare + convert API
model = prepare(model=fp32_model, quant_config=quant_config)

run_fn(model, *run_args)
q_model = convert(model)
out = q_model(self.inp)[0]
Expand Down

0 comments on commit 3f3b2fd

Please sign in to comment.