Skip to content

Commit

Permalink
Fix Issue #112 (#126)
Browse files Browse the repository at this point in the history
  • Loading branch information
horheynm authored Aug 9, 2024
1 parent d655ca4 commit 7f5443c
Show file tree
Hide file tree
Showing 3 changed files with 56 additions and 6 deletions.
9 changes: 5 additions & 4 deletions src/compressed_tensors/quantization/lifecycle/apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

import logging
import re
from collections import OrderedDict
from collections import OrderedDict, defaultdict
from copy import deepcopy
from typing import Dict, Iterable, List, Optional
from typing import OrderedDict as OrderedDictType
Expand Down Expand Up @@ -125,13 +125,14 @@ def apply_quantization_config(model: Module, config: QuantizationConfig) -> Dict
target_to_scheme[target] = scheme

# list of submodules to ignore
ignored_submodules = []
ignored_submodules = defaultdict(list)
# mark appropriate layers for quantization by setting their quantization schemes
for name, submodule in iter_named_leaf_modules(model):
# potentially fix module name to remove FSDP wrapper prefix
name = fix_fsdp_module_name(name)
if find_name_or_class_matches(name, submodule, config.ignore):
ignored_submodules.append(name)
if matches := find_name_or_class_matches(name, submodule, config.ignore):
for match in matches:
ignored_submodules[match].append(name)
continue # layer matches ignore list, continue
targets = find_name_or_class_matches(name, submodule, target_to_scheme)
if targets:
Expand Down
4 changes: 2 additions & 2 deletions src/compressed_tensors/quantization/lifecycle/calibration.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,8 @@ def set_module_for_calibration(module: Module, quantize_weights_upfront: bool =
apply to full model with `model.apply(set_module_for_calibration)`
:param module: module to set for calibration
:param quantize_weights_upfront: whether to automatically run weight quantization at the
start of calibration
:param quantize_weights_upfront: whether to automatically
run weight quantization at the start of calibration
"""
if not getattr(module, "quantization_scheme", None):
# no quantization scheme nothing to do
Expand Down
49 changes: 49 additions & 0 deletions tests/test_quantization/lifecycle/test_apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import re
from typing import Optional

import pytest
import torch
from compressed_tensors.config import CompressionFormat
from compressed_tensors.quantization import (
Expand Down Expand Up @@ -223,3 +224,51 @@ def get_sample_tinyllama_quant_config(status: str = "frozen"):
"ignore": ["LlamaRotaryEmbedding", "model.layers.1.mlp.down_proj"],
}
return QuantizationConfig.parse_obj(config_dict)


@pytest.mark.parametrize(
"ignore,should_raise_warning",
[
[("lm_head", "re:.*gate"), False],
[("lm_head", "re:.*foobarbaz"), True],
],
)
def test_apply_quantization_status(caplog, ignore, should_raise_warning):
import logging

from transformers import AutoModelForCausalLM

# load a dense, unquantized tiny llama model
device = "cuda:0"
model_name = "TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T"
model = AutoModelForCausalLM.from_pretrained(
model_name, device_map=device, torch_dtype="auto"
)

quantization_config_dict = {
"quant_method": "sparseml",
"format": "pack-quantized",
"global_compression_ratio": None,
"config_groups": {
"group_1": {
"weights": {
"num_bits": 4,
"type": "int",
"symmetric": False,
"strategy": "tensor",
},
"targets": ["Linear"],
}
},
}
quantization_config_dict["ignore"] = ignore

config = QuantizationConfig(**quantization_config_dict)
config.quantization_status = QuantizationStatus.CALIBRATION

if should_raise_warning:
# mismatch in the ignore key of quantization_config_dict
with caplog.at_level(logging.WARNING):
apply_quantization_config(model, config)
else:
apply_quantization_config(model, config)

0 comments on commit 7f5443c

Please sign in to comment.