[Bug]: When I use llmcompressor to quantify the llama3 70b model to int8-a8w8,it shows ValueError: Failed to invert hessian due to numerical instability. #966
Labels
bug
Something isn't working
When I use llmcompressor to quantify the llama3 70b model to int8-a8w8,it shows ValueError: Failed to invert hessian due to numerical instability. Consider increasing GPTQModifier.dampening_frac, increasing the number of calibration samples, or shuffling the calibration dataset,so how can i do?
H20/llama3.1 70B
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
step1:
pip install llmcompressor==0.3.0
from llmcompressor.transformers import SparseAutoModelForCausalLM
from transformers import AutoTokenizer
MODEL_ID = "/llama3_1_70B"
model = SparseAutoModelForCausalLM.from_pretrained(
MODEL_ID, device_map="auto", torch_dtype="auto",
)
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
step2 :
from datasets import load_dataset
NUM_CALIBRATION_SAMPLES = 512
MAX_SEQUENCE_LENGTH = 2048
Load and preprocess the dataset
ds = load_dataset("HuggingFaceH4/ultrachat_200k", split="train_sft")
ds = ds.shuffle(seed=42).select(range(NUM_CALIBRATION_SAMPLES))
def preprocess(example):
return {"text": tokenizer.apply_chat_template(example["messages"], tokenize=False)}
ds = ds.map(preprocess)
def tokenize(sample):
return tokenizer(sample["text"], padding=False, max_length=MAX_SEQUENCE_LENGTH, truncation=True, add_special_tokens=False)
ds = ds.map(tokenize, remove_columns=ds.column_names)
step3:
from llmcompressor.transformers import oneshot
from llmcompressor.modifiers.quantization import GPTQModifier
from llmcompressor.modifiers.smoothquant import SmoothQuantModifier
Configure the quantization algorithms
recipe = [
SmoothQuantModifier(smoothing_strength=0.8),
GPTQModifier(targets="Linear", scheme="W8A8", ignore=["lm_head"]),
]
Apply quantization
oneshot(
model=model,
dataset=ds,
recipe=recipe,
max_seq_length=MAX_SEQUENCE_LENGTH,
num_calibration_samples=NUM_CALIBRATION_SAMPLES,
)
Save the compressed model
SAVE_DIR = MODEL_ID.split("/")[1] + "-W8A8-Dynamic-Per-Token"
model.save_pretrained(SAVE_DIR, save_compressed=True)
tokenizer.save_pretrained(SAVE_DIR)
step4 : error
2024-12-10T04:11:10.323992-0800 | compress | METRIC - Compressed layer size: 448.08203125 MB
2024-12-10T04:11:10.324222-0800 | compress_module | INFO - Compressing model.layers.3.model.layers.3.mlp.down_proj...
Traceback (most recent call last):
File "/usr/local/lib/python3.12/dist-packages/llmcompressor/modifiers/quantization/gptq/utils/gptq_wrapper.py", line 179, in compress
self.H = torch.linalg.cholesky(self.H, upper=True)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
torch._C._LinAlgError: linalg.cholesky: The factorization could not be completed because the input is not positive-definite (the leading minor of order 22909 is not positive-definite).
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/1.py", line 53, in
oneshot(
File "/usr/local/lib/python3.12/dist-packages/llmcompressor/transformers/finetune/text_generation.py", line 76, in oneshot
main(model_args, data_args, training_args)
File "/usr/local/lib/python3.12/dist-packages/llmcompressor/transformers/finetune/text_generation.py", line 363, in main
stage_runner.one_shot()
File "/usr/local/lib/python3.12/dist-packages/llmcompressor/transformers/finetune/runner.py", line 171, in one_shot
self.trainer.one_shot(calibration_data=calib_data, stage=stage)
File "/usr/local/lib/python3.12/dist-packages/llmcompressor/transformers/finetune/session_mixin.py", line 439, in one_shot
apply(
File "/usr/local/lib/python3.12/dist-packages/llmcompressor/core/session_functions.py", line 184, in apply
return active_session().apply(
^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/llmcompressor/core/session.py", line 210, in apply
self.initialize(**kwargs)
File "/usr/local/lib/python3.12/dist-packages/llmcompressor/core/session.py", line 156, in initialize
mod_data = self._lifecycle.initialize(
^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/llmcompressor/core/lifecycle.py", line 126, in initialize
data = mod.initialize(state=self.state, **extras)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/llmcompressor/modifiers/stage.py", line 124, in initialize
modifier.initialize(state, **kwargs)
File "/usr/local/lib/python3.12/dist-packages/llmcompressor/modifiers/modifier.py", line 119, in initialize
initialized = self.on_initialize(state=state, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/llmcompressor/modifiers/quantization/gptq/base.py", line 203, in on_initialize
self.apply_compression(calibration_dataloader)
File "/usr/local/lib/python3.12/dist-packages/torch/utils/_contextlib.py", line 116, in decorate_context
return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/llmcompressor/modifiers/quantization/gptq/base.py", line 303, in apply_compression
layer_compressor.compress()
File "/usr/local/lib/python3.12/dist-packages/llmcompressor/modifiers/utils/layer_compressor.py", line 177, in compress
self.layer.apply(compress_module)
File "/usr/local/lib/python3.12/dist-packages/torch/nn/modules/module.py", line 1029, in apply
module.apply(fn)
File "/usr/local/lib/python3.12/dist-packages/torch/nn/modules/module.py", line 1029, in apply
module.apply(fn)
File "/usr/local/lib/python3.12/dist-packages/torch/nn/modules/module.py", line 1030, in apply
fn(self)
File "/usr/local/lib/python3.12/dist-packages/torch/utils/_contextlib.py", line 116, in decorate_context
return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/llmcompressor/modifiers/utils/layer_compressor.py", line 174, in compress_module
module.compress(**self.args)
File "/usr/local/lib/python3.12/dist-packages/llmcompressor/modifiers/quantization/gptq/utils/gptq_wrapper.py", line 182, in compress
raise ValueError(
ValueError: Failed to invert hessian due to numerical instability. Consider increasing GPTQModifier.dampening_frac, increasing the number of calibration samples, or shuffling the calibration dataset
The text was updated successfully, but these errors were encountered: