auto select best device (#822)

* auto select best device * ruff * fix wrong check * update readme * fix circular import
ModelCloud · Dec 12, 2024 · dffa089 · dffa089
1 parent 5452e0b
commit dffa089
Show file tree

Hide file tree

Showing 2 changed files with 6 additions and 2 deletions.
diff --git a/README.md b/README.md
@@ -130,7 +130,7 @@ Below is a basic sample using `GPTQModel` to quantize a llm model and perform po
 ```py
 from datasets import load_dataset
 from transformers import AutoTokenizer
-from gptqmodel import GPTQModel, QuantizeConfig, get_best_device
+from gptqmodel import GPTQModel, QuantizeConfig
 
 model_id = "meta-llama/Llama-3.2-1B-Instruct"
 quant_path = "Llama-3.2-1B-Instruct-gptqmodel-4bit"
@@ -154,7 +154,7 @@ model.quantize(calibration_dataset)
 
 model.save(quant_path)
 
-model = GPTQModel.load(quant_path, device=get_best_device())
+model = GPTQModel.load(quant_path)
 
 result = model.generate(
   **tokenizer(

diff --git a/gptqmodel/models/auto.py b/gptqmodel/models/auto.py
@@ -9,6 +9,7 @@
 from huggingface_hub import list_repo_files
 from transformers import AutoConfig
 
+from ._const import get_best_device
 from ..utils import BACKEND, EVAL
 from ..utils.logger import setup_logger
 from ..utils.model import check_and_get_model_type
@@ -147,6 +148,9 @@ def load(
                             is_quantized = True
                             break
 
+        if not device and not device_map:
+            device = get_best_device()
+
         if is_quantized:
             return cls.from_quantized(
                 model_id_or_path=model_id_or_path,