From c99638cb0dec5e70f2253a72a78b8e1b17360a98 Mon Sep 17 00:00:00 2001
From: ZX-ModelCloud <165115237+ZX-ModelCloud@users.noreply.github.com>
Date: Tue, 17 Dec 2024 17:30:51 +0800
Subject: [PATCH] Expose hf_quantize() (#888)

* check device_map={"":"auto"}

* torch.mps may not have is_available() in some versions of torch

* cleanup

* add hf_quantize()
---
 gptqmodel/quantization/gptq.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/gptqmodel/quantization/gptq.py b/gptqmodel/quantization/gptq.py
index 58ce7b033..c04b445a2 100644
--- a/gptqmodel/quantization/gptq.py
+++ b/gptqmodel/quantization/gptq.py
@@ -70,6 +70,17 @@ def add_batch(self, inp, out):
         # self.H += 2 / self.nsamples * inp.matmul(inp.t())
         self.H += inp.matmul(inp.t())
 
+    def hf_quantize(
+            self,
+            blocksize=128,
+            percdamp=0.01,
+            damp_auto_increment=0.0015,
+            group_size=-1,
+            actorder=False,
+            static_groups=False,
+    ):
+        return self.quantize(blocksize, percdamp, damp_auto_increment, group_size, actorder, static_groups)
+
     @torch.inference_mode()
     def quantize(
         self,