From c99638cb0dec5e70f2253a72a78b8e1b17360a98 Mon Sep 17 00:00:00 2001 From: ZX-ModelCloud <165115237+ZX-ModelCloud@users.noreply.github.com> Date: Tue, 17 Dec 2024 17:30:51 +0800 Subject: [PATCH] Expose hf_quantize() (#888) * check device_map={"":"auto"} * torch.mps may not have is_available() in some versions of torch * cleanup * add hf_quantize() --- gptqmodel/quantization/gptq.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/gptqmodel/quantization/gptq.py b/gptqmodel/quantization/gptq.py index 58ce7b033..c04b445a2 100644 --- a/gptqmodel/quantization/gptq.py +++ b/gptqmodel/quantization/gptq.py @@ -70,6 +70,17 @@ def add_batch(self, inp, out): # self.H += 2 / self.nsamples * inp.matmul(inp.t()) self.H += inp.matmul(inp.t()) + def hf_quantize( + self, + blocksize=128, + percdamp=0.01, + damp_auto_increment=0.0015, + group_size=-1, + actorder=False, + static_groups=False, + ): + return self.quantize(blocksize, percdamp, damp_auto_increment, group_size, actorder, static_groups) + @torch.inference_mode() def quantize( self,