fix sparse bugs (#275)

ModelTC · Dec 23, 2024 · 32b243a · 32b243a
1 parent c99b2b7
commit 32b243a
Show file tree

Hide file tree

Showing 39 changed files with 53 additions and 43 deletions.
diff --git a/configs/quantization/backend/autoawq/awq_w4a16.yml b/configs/quantization/backend/autoawq/awq_w4a16.yml
@@ -12,7 +12,7 @@ calib:
     n_samples: 128
     bs: -1
     seq_len: 512
-    preproc: general
+    preproc: txt_general_preproc
     seed: *seed
 eval:
     eval_pos: [fake_quant]

diff --git a/configs/quantization/backend/autoawq/w4a16_combin/step_1_awq.yml b/configs/quantization/backend/autoawq/w4a16_combin/step_1_awq.yml
@@ -12,7 +12,7 @@ calib:
     n_samples: 128
     bs: -1
     seq_len: 512
-    preproc: general
+    preproc: txt_general_preproc
     seed: *seed
 eval:
     eval_pos: [fake_quant]

diff --git a/configs/quantization/backend/mlcllm/awq_w4a16.yml b/configs/quantization/backend/mlcllm/awq_w4a16.yml
@@ -12,7 +12,7 @@ calib:
     n_samples: 128
     bs: -1
     seq_len: 512
-    preproc: general
+    preproc: txt_general_preproc
     seed: *seed
 eval:
     eval_pos: [fake_quant]

diff --git a/configs/quantization/backend/mlcllm/w4a16_combin/step_1_awq.yml b/configs/quantization/backend/mlcllm/w4a16_combin/step_1_awq.yml
@@ -12,7 +12,7 @@ calib:
     n_samples: 128
     bs: -1
     seq_len: 512
-    preproc: general
+    preproc: txt_general_preproc
     seed: *seed
 eval:
     eval_pos: [fake_quant]

diff --git a/configs/quantization/backend/sglang/awq_w4a16.yml b/configs/quantization/backend/sglang/awq_w4a16.yml
@@ -12,7 +12,7 @@ calib:
     n_samples: 128
     bs: -1
     seq_len: 512
-    preproc: general
+    preproc: txt_general_preproc
     seed: *seed
 eval:
     eval_pos: [fake_quant]

diff --git a/configs/quantization/backend/sglang/awq_w8a8.yml b/configs/quantization/backend/sglang/awq_w8a8.yml
@@ -12,7 +12,7 @@ calib:
     n_samples: 128
     bs: -1
     seq_len: 512
-    preproc: general
+    preproc: txt_general_preproc
     seed: *seed
 eval:
     eval_pos: [fake_quant]

diff --git a/configs/quantization/backend/sglang/fp8/awq_fp8.yml b/configs/quantization/backend/sglang/fp8/awq_fp8.yml
@@ -12,7 +12,7 @@ calib:
     n_samples: 128
     bs: -1
     seq_len: 512
-    preproc: general
+    preproc: txt_general_preproc
     seed: *seed
 eval:
     eval_pos: [fake_quant]

diff --git a/configs/quantization/backend/sglang/fp8/awq_fp8_static.yml b/configs/quantization/backend/sglang/fp8/awq_fp8_static.yml
@@ -12,7 +12,7 @@ calib:
     n_samples: 128
     bs: -1
     seq_len: 512
-    preproc: general
+    preproc: txt_general_preproc
     seed: *seed
 eval:
     eval_pos: [fake_quant]

diff --git a/configs/quantization/backend/sglang/fp8/smoothquant_fp8.yml b/configs/quantization/backend/sglang/fp8/smoothquant_fp8.yml
@@ -11,7 +11,7 @@ calib:
     n_samples: 512
     bs: 1
     seq_len: 512
-    preproc: general
+    preproc: txt_general_preproc
     seed: *seed
 eval:
     eval_pos: [fake_quant]

diff --git a/configs/quantization/backend/sglang/smoothquant_w8a8.yml b/configs/quantization/backend/sglang/smoothquant_w8a8.yml
@@ -11,7 +11,7 @@ calib:
     n_samples: 512
     bs: 1
     seq_len: 512
-    preproc: general
+    preproc: txt_general_preproc
     seed: *seed
 eval:
     eval_pos: [fake_quant]

diff --git a/configs/quantization/backend/sglang/w4a16_combin/step_1_awq.yml b/configs/quantization/backend/sglang/w4a16_combin/step_1_awq.yml
@@ -12,7 +12,7 @@ calib:
     n_samples: 128
     bs: -1
     seq_len: 512
-    preproc: general
+    preproc: txt_general_preproc
     seed: *seed
 eval:
     eval_pos: [fake_quant]

diff --git a/configs/quantization/backend/vllm/awq_w4a16.yml b/configs/quantization/backend/vllm/awq_w4a16.yml
@@ -12,7 +12,7 @@ calib:
     n_samples: 128
     bs: -1
     seq_len: 512
-    preproc: general
+    preproc: txt_general_preproc
     seed: *seed
 eval:
     eval_pos: [fake_quant]

diff --git a/configs/quantization/backend/vllm/awq_w8a8.yml b/configs/quantization/backend/vllm/awq_w8a8.yml
@@ -12,7 +12,7 @@ calib:
     n_samples: 128
     bs: -1
     seq_len: 512
-    preproc: general
+    preproc: txt_general_preproc
     seed: *seed
 eval:
     eval_pos: [fake_quant]

diff --git a/configs/quantization/backend/vllm/fp8/awq_fp8.yml b/configs/quantization/backend/vllm/fp8/awq_fp8.yml
@@ -12,7 +12,7 @@ calib:
     n_samples: 128
     bs: -1
     seq_len: 512
-    preproc: general
+    preproc: txt_general_preproc
     seed: *seed
 eval:
     eval_pos: [fake_quant]

diff --git a/configs/quantization/backend/vllm/fp8/awq_fp8_static.yml b/configs/quantization/backend/vllm/fp8/awq_fp8_static.yml
@@ -12,7 +12,7 @@ calib:
     n_samples: 128
     bs: -1
     seq_len: 512
-    preproc: general
+    preproc: txt_general_preproc
     seed: *seed
 eval:
     eval_pos: [fake_quant]

diff --git a/configs/quantization/backend/vllm/fp8/smoothquant_fp8.yml b/configs/quantization/backend/vllm/fp8/smoothquant_fp8.yml
@@ -11,7 +11,7 @@ calib:
     n_samples: 512
     bs: 1
     seq_len: 512
-    preproc: general
+    preproc: txt_general_preproc
     seed: *seed
 eval:
     eval_pos: [fake_quant]

diff --git a/configs/quantization/backend/vllm/smoothquant_w8a8.yml b/configs/quantization/backend/vllm/smoothquant_w8a8.yml
@@ -11,7 +11,7 @@ calib:
     n_samples: 512
     bs: 1
     seq_len: 512
-    preproc: general
+    preproc: txt_general_preproc
     seed: *seed
 eval:
     eval_pos: [fake_quant]

diff --git a/configs/quantization/backend/vllm/w4a16_combin/step_1_awq.yml b/configs/quantization/backend/vllm/w4a16_combin/step_1_awq.yml
@@ -12,7 +12,7 @@ calib:
     n_samples: 128
     bs: -1
     seq_len: 512
-    preproc: general
+    preproc: txt_general_preproc
     seed: *seed
 eval:
     eval_pos: [fake_quant]

diff --git a/configs/quantization/combination/awq_comb_omni/w2a16g64/step_1_awq.yml b/configs/quantization/combination/awq_comb_omni/w2a16g64/step_1_awq.yml
@@ -12,7 +12,7 @@ calib:
     n_samples: 128
     bs: -1
     seq_len: 512
-    preproc: general
+    preproc: txt_general_preproc
     seed: *seed
 eval:
     eval_pos: [fake_quant]

diff --git a/configs/quantization/combination/awq_comb_omni/w3a16g128/step_1_awq.yml b/configs/quantization/combination/awq_comb_omni/w3a16g128/step_1_awq.yml
@@ -12,7 +12,7 @@ calib:
     n_samples: 128
     bs: -1
     seq_len: 512
-    preproc: general
+    preproc: txt_general_preproc
     seed: *seed
 eval:
     eval_pos: [fake_quant]

diff --git a/configs/quantization/combination/awq_comb_omni/w4a16g128/step_1_awq.yml b/configs/quantization/combination/awq_comb_omni/w4a16g128/step_1_awq.yml
@@ -12,7 +12,7 @@ calib:
     n_samples: 128
     bs: -1
     seq_len: 512
-    preproc: general
+    preproc: txt_general_preproc
     seed: *seed
 eval:
     eval_pos: [fake_quant]

diff --git a/configs/quantization/combination/awq_comb_omni/w6a6/step_1_awq.yml b/configs/quantization/combination/awq_comb_omni/w6a6/step_1_awq.yml
@@ -12,7 +12,7 @@ calib:
     n_samples: 128
     bs: -1
     seq_len: 512
-    preproc: general
+    preproc: txt_general_preproc
     seed: *seed
 eval:
     eval_pos: [fake_quant]

diff --git a/configs/quantization/combination/awq_comb_omni/w8a8/step_1_awq.yml b/configs/quantization/combination/awq_comb_omni/w8a8/step_1_awq.yml
@@ -12,7 +12,7 @@ calib:
     n_samples: 128
     bs: -1
     seq_len: 512
-    preproc: general
+    preproc: txt_general_preproc
     seed: *seed
 eval:
     eval_pos: [fake_quant]

diff --git a/configs/quantization/methods/FP_Quant/awq_we2m1a16_g128.yml b/configs/quantization/methods/FP_Quant/awq_we2m1a16_g128.yml
@@ -11,7 +11,7 @@ calib:
     n_samples: 128
     bs: -1
     seq_len: 512
-    preproc: general
+    preproc: txt_general_preproc
     seed: *seed
 eval:
     eval_pos: [pretrain, transformed, fake_quant]

diff --git a/configs/quantization/methods/LlmInt8/llmint8_w_only.yml b/configs/quantization/methods/LlmInt8/llmint8_w_only.yml
@@ -11,7 +11,7 @@ calib:
     n_samples: 128
     bs: 1
     seq_len: 2048
-    preproc: general
+    preproc: txt_general_preproc
     seed: *seed
 eval:
     eval_pos: [pretrain, fake_quant]

diff --git a/configs/quantization/methods/NormTweaking/ntweak_w_a.yml b/configs/quantization/methods/NormTweaking/ntweak_w_a.yml
@@ -11,7 +11,7 @@ calib:
     n_samples: 128
     bs: 1
     seq_len: 512
-    preproc: general
+    preproc: txt_general_preproc
     seed: *seed
 eval:
     eval_pos: [pretrain, fake_quant]

diff --git a/configs/quantization/methods/NormTweaking/ntweak_w_only.yml b/configs/quantization/methods/NormTweaking/ntweak_w_only.yml
@@ -11,7 +11,7 @@ calib:
     n_samples: 128
     bs: 1
     seq_len: 512
-    preproc: general
+    preproc: txt_general_preproc
     seed: *seed
 eval:
     eval_pos: [pretrain, fake_quant]

diff --git a/configs/quantization/methods/OsPlus/osplus_w_a.yml b/configs/quantization/methods/OsPlus/osplus_w_a.yml
@@ -11,7 +11,7 @@ calib:
     n_samples: 1
     bs: 1
     seq_len: 512
-    preproc: general
+    preproc: txt_general_preproc
     seed: *seed
 eval:
     eval_pos: [pretrain, transformed, fake_quant]

diff --git a/configs/quantization/methods/RTN/rtn_w_a_pertensor_static.yml b/configs/quantization/methods/RTN/rtn_w_a_pertensor_static.yml
@@ -11,7 +11,7 @@ calib:
     n_samples: 128
     bs: 1
     seq_len: 2048
-    preproc: general
+    preproc: txt_general_preproc
     seed: *seed
 eval:
     eval_pos: [pretrain, fake_quant]

diff --git a/configs/quantization/methods/RTN/rtn_w_a_pertensor_static_kv.yml b/configs/quantization/methods/RTN/rtn_w_a_pertensor_static_kv.yml
@@ -11,7 +11,7 @@ calib:
     n_samples: 128
     bs: 1
     seq_len: 2048
-    preproc: general
+    preproc: txt_general_preproc
     seed: *seed
 eval:
     eval_pos: [pretrain, fake_quant]

diff --git a/configs/quantization/methods/SmoothQuant/smoothquant_w_a.yml b/configs/quantization/methods/SmoothQuant/smoothquant_w_a.yml
@@ -11,7 +11,7 @@ calib:
     n_samples: 512
     bs: 1
     seq_len: 512
-    preproc: general
+    preproc: txt_general_preproc
     seed: *seed
 eval:
     eval_pos: [pretrain, transformed, fake_quant]

diff --git a/configs/sparsification/methods/Magnitude/magnitude.yml b/configs/sparsification/methods/Magnitude/magnitude.yml
@@ -11,7 +11,7 @@ calib:
     n_samples: 128
     bs: -1
     seq_len: 512
-    preproc: general
+    preproc: txt_general_preproc
     seed: *seed
 eval:
     eval_pos: [transformed]

diff --git a/configs/sparsification/methods/ShortGPT/shortgpt.yml b/configs/sparsification/methods/ShortGPT/shortgpt.yml
@@ -11,7 +11,7 @@ calib:
     n_samples: 128
     bs: -1
     seq_len: 512
-    preproc: general
+    preproc: txt_general_preproc
     seed: *seed
 eval:
     eval_pos: [transformed]

diff --git a/configs/sparsification/methods/Wanda/wanda.yml b/configs/sparsification/methods/Wanda/wanda.yml
@@ -11,7 +11,7 @@ calib:
     n_samples: 128
     bs: -1
     seq_len: 512
-    preproc: general
+    preproc: txt_general_preproc
     seed: *seed
 eval:
     eval_pos: [transformed]

diff --git a/llmc/__main__.py b/llmc/__main__.py
@@ -23,6 +23,15 @@
 from llmc.utils.registry_factory import ALGO_REGISTRY, MODEL_REGISTRY
 
 
+def get_modality(config):
+    if 'quant' in config:
+        return config.quant.get('quant_objects', ['language'])
+    elif 'sparse' in config:
+        return config.sparse.get('sparse_objects', ['language'])
+    else:
+        return ['language']
+
+
 def main(config):
     model = MODEL_REGISTRY[config.model.type](config)
 
@@ -32,7 +41,8 @@ def main(config):
     eval_list = get_eval_list(model, config)
     eval_model(model, None, eval_list, eval_pos='pretrain')
 
-    for modality in config.quant.get('quant_objects', ['language']):
+    # for modality in config.quant.get('quant_objects', ['language']):
+    for modality in get_modality(config):
         model.set_modality(modality)
         if not config.get('calib', False):
             blockwise_opt = ALGO_REGISTRY[config.quant.method](

diff --git a/llmc/compression/sparsification/base_blockwise_sparsification.py b/llmc/compression/sparsification/base_blockwise_sparsification.py
@@ -12,8 +12,8 @@
 
 
 class BaseBlockwiseSparsification(BlockwiseOpt):
-    def __init__(self, model, sparsity_config, input, padding_mask, config, modality='language'):
-        super().__init__(model, sparsity_config, input, padding_mask, config, modality)
+    def __init__(self, model, sparsity_config, input, padding_mask, config):
+        super().__init__(model, sparsity_config, input, padding_mask, config)
         self.set_sparsity_config()
 
     def block_init(self, block):

diff --git a/llmc/compression/sparsification/magnitude.py b/llmc/compression/sparsification/magnitude.py
@@ -8,8 +8,8 @@
 
 @ALGO_REGISTRY
 class Magnitude(BaseBlockwiseSparsification):
-    def __init__(self, model, sparsity_config, input, padding_mask, config, modality='language'):
-        super().__init__(model, sparsity_config, input, padding_mask, config, modality)
+    def __init__(self, model, sparsity_config, input, padding_mask, config):
+        super().__init__(model, sparsity_config, input, padding_mask, config)
 
     @torch.no_grad()
     def subset_transform(

diff --git a/llmc/compression/sparsification/shortgpt.py b/llmc/compression/sparsification/shortgpt.py
@@ -17,8 +17,8 @@
 
 @ALGO_REGISTRY
 class ShortGPT(BaseBlockwiseSparsification):
-    def __init__(self, model, sparsity_config, input, padding_mask, config, modality='language'):
-        super().__init__(model, sparsity_config, input, padding_mask, config, modality)
+    def __init__(self, model, sparsity_config, input, padding_mask, config):
+        super().__init__(model, sparsity_config, input, padding_mask, config)
 
     def block_opt(self, block):
         block = block.cuda()

diff --git a/llmc/compression/sparsification/wanda.py b/llmc/compression/sparsification/wanda.py
@@ -9,8 +9,8 @@
 
 @ALGO_REGISTRY
 class Wanda(BaseBlockwiseSparsification):
-    def __init__(self, model, sparsity_config, input, padding_mask, config, modality='language'):
-        super().__init__(model, sparsity_config, input, padding_mask, config, modality)
+    def __init__(self, model, sparsity_config, input, padding_mask, config):
+        super().__init__(model, sparsity_config, input, padding_mask, config)
 
     @torch.no_grad()
     def get_row_scale(self, layer, act):