Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Don't have a sparsity_config if model is quantized #680

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from typing import Optional

from compressed_tensors import CompressionFormat
from compressed_tensors.config import SparsityCompressionConfig
from compressed_tensors.quantization import QuantizationStrategy, QuantizationType
from compressed_tensors.quantization.utils import (
is_model_quantized,
Expand All @@ -16,7 +15,7 @@ def infer_quantization_format(
model,
quantization_format: Optional[str] = None,
save_compressed: bool = False,
sparsity_config: Optional[SparsityCompressionConfig] = None,
sparsity_structure: Optional[str] = None,
) -> str:
"""
Infers a quantization format based on model state and compression args
Expand All @@ -36,9 +35,7 @@ def infer_quantization_format(

if save_compressed:
weight_args, input_args = _get_unique_quant_args(model)
is_24_structure = (
sparsity_config and sparsity_config.sparsity_structure == "2:4"
)
is_24_structure = sparsity_structure == "2:4"
is_weight_only = len(input_args) == 0 and len(weight_args) > 0

if is_weight_only: # w4a16 and w8a16
Expand Down
9 changes: 5 additions & 4 deletions src/llmcompressor/transformers/compression/sparsity_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,9 @@ def from_pretrained(
:param compress: whether or not to compress the model on disk
:return: compression config inferred from the model
"""
if is_model_quantized(model):
# compressing a sparse quantized model is not supported yet
return None

global_sparsity = SparsityConfigMetadata.infer_global_sparsity(
model, state_dict=state_dict
Expand All @@ -95,10 +98,8 @@ def from_pretrained(
sparsity_structure = SparsityConfigMetadata.infer_sparsity_structure(
model=model
)
if is_model_quantized(model):
# compressing a sparse quantized model is not supported yet
format = CompressionFormat.dense.value
elif compress:

if compress:
format = CompressionFormat.sparse_bitmask.value
else:
format = CompressionFormat.dense.value
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -94,11 +94,16 @@ def save_pretrained_wrapper(
model, state_dict=state_dict, compress=False
)

sparsity_structure = (
sparsity_config.sparsity_structure
if sparsity_config is not None
else SparsityConfigMetadata.infer_sparsity_structure(model)
)
quantization_format = infer_quantization_format(
model=model,
quantization_format=quantization_format,
save_compressed=save_compressed,
sparsity_config=sparsity_config,
sparsity_structure=sparsity_structure,
)
compressor = ModelCompressor.from_pretrained_model(
model,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ def test_infer_quant_format(preset, sparsity_structure, expected_format):
module.quantization_scheme = quant_scheme

inferred_format = infer_quantization_format(
dummy_model, save_compressed=True, sparsity_config=sparsity_config
dummy_model,
save_compressed=True,
sparsity_structure=sparsity_config.sparsity_structure,
)
assert inferred_format.value == expected_format
Loading