intel · wenhuach21 · Dec 12, 2024 · Dec 12, 2024
diff --git a/auto_round/auto_quantizer.py b/auto_round/auto_quantizer.py
@@ -363,7 +363,14 @@ def detect_device(self, target_backend, orig_backend):
         if backend is None:
             raise ValueError("Backend not found, please set it to 'auto' to have a try ")
 
-        return BackendInfos[backend].device[0]
+        device = BackendInfos[backend].device[0]
+        if "cuda" in device and torch.cuda.is_available():
+            return device
+        elif "hpu" in device and is_hpu_supported():
+            return device
+        else:
+            return "cpu"
+
 
     def convert_model(self, model: nn.Module):
         """Converts the given model to an AutoRound model by replacing its layers with quantized layers.
@@ -392,6 +399,7 @@ def convert_model(self, model: nn.Module):
             quantization_config.target_backend = quantization_config.backend
 
         target_device = self.detect_device(quantization_config.target_backend, quantization_config.backend)
+
         self.target_device = target_device
 
         if hasattr(quantization_config, "backend"):  # pragma: no cover
@@ -744,3 +752,4 @@ def is_serializable(self):
 transformers.quantizers.auto.AutoHfQuantizer = AutoHfQuantizer
 transformers.modeling_utils.AutoHfQuantizer = AutoHfQuantizer
 
+