Skip to content

Commit

Permalink
ix incorrect device setting in autoround format inference (#383)
Browse files Browse the repository at this point in the history
Signed-off-by: Zhang, Weiwei1 <weiwei1.zhang@intel.com>
  • Loading branch information
WeiweiZhang1 authored Dec 12, 2024
1 parent 8d8c70d commit e88882e
Showing 1 changed file with 10 additions and 1 deletion.
11 changes: 10 additions & 1 deletion auto_round/auto_quantizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -363,7 +363,14 @@ def detect_device(self, target_backend, orig_backend):
if backend is None:
raise ValueError("Backend not found, please set it to 'auto' to have a try ")

return BackendInfos[backend].device[0]
device = BackendInfos[backend].device[0]
if "cuda" in device and torch.cuda.is_available():
return device
elif "hpu" in device and is_hpu_supported():
return device
else:
return "cpu"


def convert_model(self, model: nn.Module):
"""Converts the given model to an AutoRound model by replacing its layers with quantized layers.
Expand Down Expand Up @@ -392,6 +399,7 @@ def convert_model(self, model: nn.Module):
quantization_config.target_backend = quantization_config.backend

target_device = self.detect_device(quantization_config.target_backend, quantization_config.backend)

self.target_device = target_device

if hasattr(quantization_config, "backend"): # pragma: no cover
Expand Down Expand Up @@ -744,3 +752,4 @@ def is_serializable(self):
transformers.quantizers.auto.AutoHfQuantizer = AutoHfQuantizer
transformers.modeling_utils.AutoHfQuantizer = AutoHfQuantizer


0 comments on commit e88882e

Please sign in to comment.