Skip to content

Commit

Permalink
fix sparse bugs (#275)
Browse files Browse the repository at this point in the history
  • Loading branch information
helloyongyang authored Dec 23, 2024
1 parent c99b2b7 commit 32b243a
Show file tree
Hide file tree
Showing 39 changed files with 53 additions and 43 deletions.
2 changes: 1 addition & 1 deletion configs/quantization/backend/autoawq/awq_w4a16.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ calib:
n_samples: 128
bs: -1
seq_len: 512
preproc: general
preproc: txt_general_preproc
seed: *seed
eval:
eval_pos: [fake_quant]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ calib:
n_samples: 128
bs: -1
seq_len: 512
preproc: general
preproc: txt_general_preproc
seed: *seed
eval:
eval_pos: [fake_quant]
Expand Down
2 changes: 1 addition & 1 deletion configs/quantization/backend/mlcllm/awq_w4a16.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ calib:
n_samples: 128
bs: -1
seq_len: 512
preproc: general
preproc: txt_general_preproc
seed: *seed
eval:
eval_pos: [fake_quant]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ calib:
n_samples: 128
bs: -1
seq_len: 512
preproc: general
preproc: txt_general_preproc
seed: *seed
eval:
eval_pos: [fake_quant]
Expand Down
2 changes: 1 addition & 1 deletion configs/quantization/backend/sglang/awq_w4a16.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ calib:
n_samples: 128
bs: -1
seq_len: 512
preproc: general
preproc: txt_general_preproc
seed: *seed
eval:
eval_pos: [fake_quant]
Expand Down
2 changes: 1 addition & 1 deletion configs/quantization/backend/sglang/awq_w8a8.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ calib:
n_samples: 128
bs: -1
seq_len: 512
preproc: general
preproc: txt_general_preproc
seed: *seed
eval:
eval_pos: [fake_quant]
Expand Down
2 changes: 1 addition & 1 deletion configs/quantization/backend/sglang/fp8/awq_fp8.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ calib:
n_samples: 128
bs: -1
seq_len: 512
preproc: general
preproc: txt_general_preproc
seed: *seed
eval:
eval_pos: [fake_quant]
Expand Down
2 changes: 1 addition & 1 deletion configs/quantization/backend/sglang/fp8/awq_fp8_static.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ calib:
n_samples: 128
bs: -1
seq_len: 512
preproc: general
preproc: txt_general_preproc
seed: *seed
eval:
eval_pos: [fake_quant]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ calib:
n_samples: 512
bs: 1
seq_len: 512
preproc: general
preproc: txt_general_preproc
seed: *seed
eval:
eval_pos: [fake_quant]
Expand Down
2 changes: 1 addition & 1 deletion configs/quantization/backend/sglang/smoothquant_w8a8.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ calib:
n_samples: 512
bs: 1
seq_len: 512
preproc: general
preproc: txt_general_preproc
seed: *seed
eval:
eval_pos: [fake_quant]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ calib:
n_samples: 128
bs: -1
seq_len: 512
preproc: general
preproc: txt_general_preproc
seed: *seed
eval:
eval_pos: [fake_quant]
Expand Down
2 changes: 1 addition & 1 deletion configs/quantization/backend/vllm/awq_w4a16.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ calib:
n_samples: 128
bs: -1
seq_len: 512
preproc: general
preproc: txt_general_preproc
seed: *seed
eval:
eval_pos: [fake_quant]
Expand Down
2 changes: 1 addition & 1 deletion configs/quantization/backend/vllm/awq_w8a8.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ calib:
n_samples: 128
bs: -1
seq_len: 512
preproc: general
preproc: txt_general_preproc
seed: *seed
eval:
eval_pos: [fake_quant]
Expand Down
2 changes: 1 addition & 1 deletion configs/quantization/backend/vllm/fp8/awq_fp8.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ calib:
n_samples: 128
bs: -1
seq_len: 512
preproc: general
preproc: txt_general_preproc
seed: *seed
eval:
eval_pos: [fake_quant]
Expand Down
2 changes: 1 addition & 1 deletion configs/quantization/backend/vllm/fp8/awq_fp8_static.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ calib:
n_samples: 128
bs: -1
seq_len: 512
preproc: general
preproc: txt_general_preproc
seed: *seed
eval:
eval_pos: [fake_quant]
Expand Down
2 changes: 1 addition & 1 deletion configs/quantization/backend/vllm/fp8/smoothquant_fp8.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ calib:
n_samples: 512
bs: 1
seq_len: 512
preproc: general
preproc: txt_general_preproc
seed: *seed
eval:
eval_pos: [fake_quant]
Expand Down
2 changes: 1 addition & 1 deletion configs/quantization/backend/vllm/smoothquant_w8a8.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ calib:
n_samples: 512
bs: 1
seq_len: 512
preproc: general
preproc: txt_general_preproc
seed: *seed
eval:
eval_pos: [fake_quant]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ calib:
n_samples: 128
bs: -1
seq_len: 512
preproc: general
preproc: txt_general_preproc
seed: *seed
eval:
eval_pos: [fake_quant]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ calib:
n_samples: 128
bs: -1
seq_len: 512
preproc: general
preproc: txt_general_preproc
seed: *seed
eval:
eval_pos: [fake_quant]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ calib:
n_samples: 128
bs: -1
seq_len: 512
preproc: general
preproc: txt_general_preproc
seed: *seed
eval:
eval_pos: [fake_quant]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ calib:
n_samples: 128
bs: -1
seq_len: 512
preproc: general
preproc: txt_general_preproc
seed: *seed
eval:
eval_pos: [fake_quant]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ calib:
n_samples: 128
bs: -1
seq_len: 512
preproc: general
preproc: txt_general_preproc
seed: *seed
eval:
eval_pos: [fake_quant]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ calib:
n_samples: 128
bs: -1
seq_len: 512
preproc: general
preproc: txt_general_preproc
seed: *seed
eval:
eval_pos: [fake_quant]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ calib:
n_samples: 128
bs: -1
seq_len: 512
preproc: general
preproc: txt_general_preproc
seed: *seed
eval:
eval_pos: [pretrain, transformed, fake_quant]
Expand Down
2 changes: 1 addition & 1 deletion configs/quantization/methods/LlmInt8/llmint8_w_only.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ calib:
n_samples: 128
bs: 1
seq_len: 2048
preproc: general
preproc: txt_general_preproc
seed: *seed
eval:
eval_pos: [pretrain, fake_quant]
Expand Down
2 changes: 1 addition & 1 deletion configs/quantization/methods/NormTweaking/ntweak_w_a.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ calib:
n_samples: 128
bs: 1
seq_len: 512
preproc: general
preproc: txt_general_preproc
seed: *seed
eval:
eval_pos: [pretrain, fake_quant]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ calib:
n_samples: 128
bs: 1
seq_len: 512
preproc: general
preproc: txt_general_preproc
seed: *seed
eval:
eval_pos: [pretrain, fake_quant]
Expand Down
2 changes: 1 addition & 1 deletion configs/quantization/methods/OsPlus/osplus_w_a.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ calib:
n_samples: 1
bs: 1
seq_len: 512
preproc: general
preproc: txt_general_preproc
seed: *seed
eval:
eval_pos: [pretrain, transformed, fake_quant]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ calib:
n_samples: 128
bs: 1
seq_len: 2048
preproc: general
preproc: txt_general_preproc
seed: *seed
eval:
eval_pos: [pretrain, fake_quant]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ calib:
n_samples: 128
bs: 1
seq_len: 2048
preproc: general
preproc: txt_general_preproc
seed: *seed
eval:
eval_pos: [pretrain, fake_quant]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ calib:
n_samples: 512
bs: 1
seq_len: 512
preproc: general
preproc: txt_general_preproc
seed: *seed
eval:
eval_pos: [pretrain, transformed, fake_quant]
Expand Down
2 changes: 1 addition & 1 deletion configs/sparsification/methods/Magnitude/magnitude.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ calib:
n_samples: 128
bs: -1
seq_len: 512
preproc: general
preproc: txt_general_preproc
seed: *seed
eval:
eval_pos: [transformed]
Expand Down
2 changes: 1 addition & 1 deletion configs/sparsification/methods/ShortGPT/shortgpt.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ calib:
n_samples: 128
bs: -1
seq_len: 512
preproc: general
preproc: txt_general_preproc
seed: *seed
eval:
eval_pos: [transformed]
Expand Down
2 changes: 1 addition & 1 deletion configs/sparsification/methods/Wanda/wanda.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ calib:
n_samples: 128
bs: -1
seq_len: 512
preproc: general
preproc: txt_general_preproc
seed: *seed
eval:
eval_pos: [transformed]
Expand Down
12 changes: 11 additions & 1 deletion llmc/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,15 @@
from llmc.utils.registry_factory import ALGO_REGISTRY, MODEL_REGISTRY


def get_modality(config):
if 'quant' in config:
return config.quant.get('quant_objects', ['language'])
elif 'sparse' in config:
return config.sparse.get('sparse_objects', ['language'])
else:
return ['language']


def main(config):
model = MODEL_REGISTRY[config.model.type](config)

Expand All @@ -32,7 +41,8 @@ def main(config):
eval_list = get_eval_list(model, config)
eval_model(model, None, eval_list, eval_pos='pretrain')

for modality in config.quant.get('quant_objects', ['language']):
# for modality in config.quant.get('quant_objects', ['language']):
for modality in get_modality(config):
model.set_modality(modality)
if not config.get('calib', False):
blockwise_opt = ALGO_REGISTRY[config.quant.method](
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@


class BaseBlockwiseSparsification(BlockwiseOpt):
def __init__(self, model, sparsity_config, input, padding_mask, config, modality='language'):
super().__init__(model, sparsity_config, input, padding_mask, config, modality)
def __init__(self, model, sparsity_config, input, padding_mask, config):
super().__init__(model, sparsity_config, input, padding_mask, config)
self.set_sparsity_config()

def block_init(self, block):
Expand Down
4 changes: 2 additions & 2 deletions llmc/compression/sparsification/magnitude.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@

@ALGO_REGISTRY
class Magnitude(BaseBlockwiseSparsification):
def __init__(self, model, sparsity_config, input, padding_mask, config, modality='language'):
super().__init__(model, sparsity_config, input, padding_mask, config, modality)
def __init__(self, model, sparsity_config, input, padding_mask, config):
super().__init__(model, sparsity_config, input, padding_mask, config)

@torch.no_grad()
def subset_transform(
Expand Down
4 changes: 2 additions & 2 deletions llmc/compression/sparsification/shortgpt.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@

@ALGO_REGISTRY
class ShortGPT(BaseBlockwiseSparsification):
def __init__(self, model, sparsity_config, input, padding_mask, config, modality='language'):
super().__init__(model, sparsity_config, input, padding_mask, config, modality)
def __init__(self, model, sparsity_config, input, padding_mask, config):
super().__init__(model, sparsity_config, input, padding_mask, config)

def block_opt(self, block):
block = block.cuda()
Expand Down
4 changes: 2 additions & 2 deletions llmc/compression/sparsification/wanda.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@

@ALGO_REGISTRY
class Wanda(BaseBlockwiseSparsification):
def __init__(self, model, sparsity_config, input, padding_mask, config, modality='language'):
super().__init__(model, sparsity_config, input, padding_mask, config, modality)
def __init__(self, model, sparsity_config, input, padding_mask, config):
super().__init__(model, sparsity_config, input, padding_mask, config)

@torch.no_grad()
def get_row_scale(self, layer, act):
Expand Down

0 comments on commit 32b243a

Please sign in to comment.