ix incorrect device setting in autoround format inference (#383)

Signed-off-by: Zhang, Weiwei1 <weiwei1.zhang@intel.com>
intel · Dec 12, 2024 · e88882e · e88882e
1 parent 8d8c70d
commit e88882e
Showing 1 changed file with 10 additions and 1 deletion.
diff --git a/auto_round/auto_quantizer.py b/auto_round/auto_quantizer.py
@@ -363,7 +363,14 @@ def detect_device(self, target_backend, orig_backend):
         if backend is None:
             raise ValueError("Backend not found, please set it to 'auto' to have a try ")
 
-        return BackendInfos[backend].device[0]
+        device = BackendInfos[backend].device[0]
+        if "cuda" in device and torch.cuda.is_available():
+            return device
+        elif "hpu" in device and is_hpu_supported():
+            return device
+        else:
+            return "cpu"
+
 
     def convert_model(self, model: nn.Module):
         """Converts the given model to an AutoRound model by replacing its layers with quantized layers.
@@ -392,6 +399,7 @@ def convert_model(self, model: nn.Module):
             quantization_config.target_backend = quantization_config.backend
 
         target_device = self.detect_device(quantization_config.target_backend, quantization_config.backend)
+
         self.target_device = target_device
 
         if hasattr(quantization_config, "backend"):  # pragma: no cover
@@ -744,3 +752,4 @@ def is_serializable(self):
 transformers.quantizers.auto.AutoHfQuantizer = AutoHfQuantizer
 transformers.modeling_utils.AutoHfQuantizer = AutoHfQuantizer
 
+