Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

set device as the same as origin model #1031

Merged
merged 2 commits into from
Nov 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/test_ipex.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ jobs:
strategy:
fail-fast: false
matrix:
transformers-version: ["4.46.*"]
transformers-version: ["4.46.0", "4.46.3"]
torch-version: ["2.4.0", "2.5.0"]

runs-on: ubuntu-22.04
Expand Down
35 changes: 10 additions & 25 deletions optimum/intel/ipex/modeling_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@
GenerationConfig,
GenerationMixin,
PretrainedConfig,
is_torch_xpu_available,
)
from transformers.dynamic_module_utils import get_class_from_dynamic_module
from transformers.generation.candidate_generator import _crop_past_key_values
Expand Down Expand Up @@ -127,18 +126,9 @@ def __init__(
warmup: bool = True,
**kwargs,
):
if is_torch_xpu_available(check_device=True):
self._device = torch.device("xpu:0")
elif torch.cuda.is_available():
self._device = torch.device("cuda:0")
else:
self._device = torch.device("cpu")

config = config or model.config

OptimizedModel.__init__(self, model=model, config=config)

self.model.to(self._device)
self._dtype = self.model.dtype if self.model.dtype is not None else torch.float32
self.use_cache = kwargs.get("use_cache", False)
self.model_save_dir = model_save_dir
Expand Down Expand Up @@ -174,7 +164,6 @@ def _from_pretrained(
local_files_only: bool = False,
torch_dtype: Optional[Union[str, "torch.dtype"]] = None,
trust_remote_code: bool = False,
file_name: Optional[str] = WEIGHTS_NAME,
**kwargs,
):
"""
Expand Down Expand Up @@ -207,9 +196,6 @@ def _from_pretrained(
float16 or bfloat16 or float32: load in a specified dtype, ignoring the model config.torch_dtype if one exists. If not specified, the model will get loaded in float32.
trust_remote_code (`bool`, *optional*)
Allows to use custom code for the modeling hosted in the model repository. This option should only be set for repositories you trust and in which you have read the code, as it will execute on your local machine arbitrary code present in the model repository.
file_name (`str`, *optional*):
The file name of the model to load. Overwrites the default file name and allows one to load the model
with a different name.
"""
if use_auth_token is not None:
warnings.warn(
Expand Down Expand Up @@ -287,7 +273,7 @@ def eval(self):

@property
def device(self) -> torch.device:
return self._device
return self.model.device

@property
def dtype(self) -> torch.dtype:
Expand All @@ -305,8 +291,7 @@ def add_patch(self) -> bool:
return self._add_patch

def to(self, device: Union[torch.device, str]):
self._device = device if isinstance(device, torch.device) else torch.device(device)
self.model.to(self._device)
self.model.to(self.device)
return self

def can_generate(self):
Expand All @@ -323,8 +308,8 @@ def _init_warmup(self):
if not self._add_patch:
# use_cache = "past_key_values" in self.input_names
dummy_inputs = _prepare_inputs_for_ipex_model(self, self.export_feature, self.use_cache)
if self._device.type != "cpu":
dummy_inputs = recursive_to_device(value=dummy_inputs, device=self._device)
if self.device.type != "cpu":
dummy_inputs = recursive_to_device(value=dummy_inputs, device=self.device)
for _ in range(2):
self(**dummy_inputs)

Expand Down Expand Up @@ -526,15 +511,15 @@ def generate(self, *args, **kwargs):
raise ValueError(
f"Assisted decoding is not supported for patched models if ipex < 2.4, support methods are {_IPEX_EXPORTED_GENERATION_METHODS}"
)
# Patch functions to support paged cache
# Patch functions to support ipex_paged cache
if self._add_patch:
transformers.generation.utils.NEED_SETUP_CACHE_CLASSES_MAPPING["paged"] = IPEXPagedCache
self.generation_config.cache_implementation = "paged"
transformers.generation.utils.NEED_SETUP_CACHE_CLASSES_MAPPING["ipex_paged"] = IPEXPagedCache
self.generation_config.cache_implementation = "ipex_paged"
if is_transformers_version(">=", "4.45.0"):
if "paged" not in transformers.generation.configuration_utils.ALL_CACHE_IMPLEMENTATIONS:
transformers.generation.configuration_utils.ALL_CACHE_IMPLEMENTATIONS.append("paged")
if "ipex_paged" not in transformers.generation.configuration_utils.ALL_CACHE_IMPLEMENTATIONS:
transformers.generation.configuration_utils.ALL_CACHE_IMPLEMENTATIONS.append("ipex_paged")
if new_kwargs.get("generation_config", None):
new_kwargs["generation_config"].cache_implementation = "paged"
new_kwargs["generation_config"].cache_implementation = "ipex_paged"

if self._add_patch and new_kwargs.get("assistant_model", None):
transformers.generation.utils._crop_past_key_values = _ipex_crop_past_key_values
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@
"nncf": ["nncf>=2.11.0"],
"openvino": ["nncf>=2.11.0", "openvino==2024.5.0", "openvino-tokenizers==2024.5.0"],
"neural-compressor": ["neural-compressor[pt]>3.0", "accelerate", "transformers<4.46"],
"ipex": ["intel-extension-for-pytorch", "transformers>=4.39,<4.47"],
"ipex": ["intel-extension-for-pytorch>=2.4", "transformers>4.45,<4.47"],
"diffusers": ["diffusers"],
"quality": QUALITY_REQUIRE,
"tests": TESTS_REQUIRE,
Expand Down
3 changes: 1 addition & 2 deletions tests/neural_compressor/test_ipex.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@

import os
import tempfile
import unittest

from neural_compressor.config import PostTrainingQuantConfig

Expand Down Expand Up @@ -53,7 +52,7 @@ class IPEXQuantizationTest(INCTestMixin):
SUPPORTED_ARCHITECTURES_WITH_EXPECTED_QUANTIZED_MATMULS = (("text-classification", "bert", 21),)

@parameterized.expand(SUPPORTED_ARCHITECTURES_WITH_EXPECTED_QUANTIZED_MATMULS)
def test_ipex_static_quantization_with_smoothquant(self, task, model_arch, expected_quantized_matmuls):
def test_static_quantization_with_smoothquant(self, task, model_arch, expected_quantized_matmuls):
recipes = {"smooth_quant": True, "smooth_quant_args": {"alpha": 0.5}}
num_samples = 10
model_name = MODEL_NAMES[model_arch]
Expand Down
Loading