From a3872d906ac81d7e70ba94d387a88b94fc17b4bc Mon Sep 17 00:00:00 2001 From: Sara Adkins Date: Wed, 24 Jul 2024 19:55:09 +0000 Subject: [PATCH] rename targets --- src/llmcompressor/modifiers/quantization/gptq/base.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/llmcompressor/modifiers/quantization/gptq/base.py b/src/llmcompressor/modifiers/quantization/gptq/base.py index d9d7959db..2d62e69fc 100644 --- a/src/llmcompressor/modifiers/quantization/gptq/base.py +++ b/src/llmcompressor/modifiers/quantization/gptq/base.py @@ -94,6 +94,7 @@ class GPTQModifier(Modifier): sequential_update: Optional[bool] = False targets: Union[str, List[str], None] = None + sequential_targets: Union[str, List[str], None] = None block_size: int = 128 quantize: Union[bool, Dict] = True dampening_frac: Optional[float] = 0.01 @@ -177,11 +178,11 @@ def on_initialize(self, state: "State", **kwargs) -> bool: modifiable_model = state.model calibration_dataloader = state.data.calib - if self.targets is None: + if self.sequential_targets is None: # if no targets are provided, default to the modules that shouldn't be # split by FSDP. For Transformers models this is equivalent to the # decoder layers (ie LlamaDecoderLayer) - self.targets = get_no_split_params(modifiable_model) + self.sequential_targets = get_no_split_params(modifiable_model) self.initialize_compression(modifiable_model, calibration_dataloader) self.apply_compression(calibration_dataloader) @@ -215,7 +216,7 @@ def compressible_layers(self) -> Dict: f"{type(self.model)} instead" ) - return get_layers(self.targets, self.model) + return get_layers(self.sequential_targets, self.model) def initialize_compression( self,