Expose hf_quantize() (#888)

* check device_map={"":"auto"} * torch.mps may not have is_available() in some versions of torch * cleanup * add hf_quantize()
ModelCloud · Dec 17, 2024 · c99638c · c99638c
1 parent 0e4893b
commit c99638c
Showing 1 changed file with 11 additions and 0 deletions.
diff --git a/gptqmodel/quantization/gptq.py b/gptqmodel/quantization/gptq.py
@@ -70,6 +70,17 @@ def add_batch(self, inp, out):
         # self.H += 2 / self.nsamples * inp.matmul(inp.t())
         self.H += inp.matmul(inp.t())
 
+    def hf_quantize(
+            self,
+            blocksize=128,
+            percdamp=0.01,
+            damp_auto_increment=0.0015,
+            group_size=-1,
+            actorder=False,
+            static_groups=False,
+    ):
+        return self.quantize(blocksize, percdamp, damp_auto_increment, group_size, actorder, static_groups)
+
     @torch.inference_mode()
     def quantize(
         self,