From f17cf1d53dedf7fbf849605bcf00775753e961f2 Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Wed, 9 Oct 2024 12:42:33 +0200 Subject: [PATCH 1/7] remove the need for the config to be in the subfolder --- optimum/modeling_base.py | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/optimum/modeling_base.py b/optimum/modeling_base.py index 29521b7c0c..2147660bf9 100644 --- a/optimum/modeling_base.py +++ b/optimum/modeling_base.py @@ -28,6 +28,7 @@ from .exporters import TasksManager from .utils import CONFIG_NAME +from .utils.file_utils import find_files_matching_pattern if TYPE_CHECKING: @@ -380,27 +381,24 @@ def from_pretrained( ) model_id, revision = model_id.split("@") + config_folder = ( + subfolder if find_files_matching_pattern(model_id, cls.config_name)[0].parent == subfolder else "" + ) + library_name = TasksManager.infer_library_from_model( - model_id, subfolder=subfolder, revision=revision, cache_dir=cache_dir, token=token + model_id, subfolder=config_folder, revision=revision, cache_dir=cache_dir, token=token ) if library_name == "timm": config = PretrainedConfig.from_pretrained( - model_id, subfolder=subfolder, revision=revision, cache_dir=cache_dir, token=token + model_id, subfolder=config_folder, revision=revision, cache_dir=cache_dir, token=token ) if config is None: - if os.path.isdir(os.path.join(model_id, subfolder)) and cls.config_name == CONFIG_NAME: - if CONFIG_NAME in os.listdir(os.path.join(model_id, subfolder)): - config = AutoConfig.from_pretrained( - os.path.join(model_id, subfolder), trust_remote_code=trust_remote_code - ) - elif CONFIG_NAME in os.listdir(model_id): + if os.path.isdir(os.path.join(model_id, config_folder)) and cls.config_name == CONFIG_NAME: + if CONFIG_NAME in os.listdir(os.path.join(model_id, config_folder)): config = AutoConfig.from_pretrained( - os.path.join(model_id, CONFIG_NAME), trust_remote_code=trust_remote_code - ) - logger.info( - f"config.json not found in the specified subfolder {subfolder}. Using the top level config.json." + os.path.join(model_id, config_folder), trust_remote_code=trust_remote_code ) else: raise OSError(f"config.json not found in {model_id} local folder") @@ -411,7 +409,7 @@ def from_pretrained( cache_dir=cache_dir, token=token, force_download=force_download, - subfolder=subfolder, + subfolder=config_folder, trust_remote_code=trust_remote_code, ) elif isinstance(config, (str, os.PathLike)): @@ -421,7 +419,7 @@ def from_pretrained( cache_dir=cache_dir, token=token, force_download=force_download, - subfolder=subfolder, + subfolder=config_folder, trust_remote_code=trust_remote_code, ) From a9b7e84f96d8ff27b00a36e59d6ec590739f7190 Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Wed, 9 Oct 2024 15:23:36 +0200 Subject: [PATCH 2/7] fix --- optimum/modeling_base.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/optimum/modeling_base.py b/optimum/modeling_base.py index 2147660bf9..0fb5668b84 100644 --- a/optimum/modeling_base.py +++ b/optimum/modeling_base.py @@ -381,9 +381,13 @@ def from_pretrained( ) model_id, revision = model_id.split("@") - config_folder = ( - subfolder if find_files_matching_pattern(model_id, cls.config_name)[0].parent == subfolder else "" - ) + if len(find_files_matching_pattern(model_id, cls.config_name, subfolder=subfolder)) == 0: + logger.info( + f"{cls.config_name} not found in the specified subfolder {subfolder}. Using the top level {cls.config_name}." + ) + config_folder = "" + else: + config_folder = subfolder library_name = TasksManager.infer_library_from_model( model_id, subfolder=config_folder, revision=revision, cache_dir=cache_dir, token=token From 5e51e368bedc813fe1373af23a9d0b72de6af608 Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Wed, 9 Oct 2024 16:26:50 +0200 Subject: [PATCH 3/7] fix for offline mode --- optimum/modeling_base.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/optimum/modeling_base.py b/optimum/modeling_base.py index 0fb5668b84..ad1ec06446 100644 --- a/optimum/modeling_base.py +++ b/optimum/modeling_base.py @@ -24,6 +24,7 @@ from huggingface_hub import create_repo, upload_file from huggingface_hub.constants import HUGGINGFACE_HUB_CACHE +from huggingface_hub.errors import OfflineModeIsEnabled from transformers import AutoConfig, PretrainedConfig, add_start_docstrings from .exporters import TasksManager @@ -381,13 +382,16 @@ def from_pretrained( ) model_id, revision = model_id.split("@") - if len(find_files_matching_pattern(model_id, cls.config_name, subfolder=subfolder)) == 0: - logger.info( - f"{cls.config_name} not found in the specified subfolder {subfolder}. Using the top level {cls.config_name}." - ) - config_folder = "" - else: - config_folder = subfolder + # TODO: enable this when offline + config_folder = subfolder + try: + if len(find_files_matching_pattern(model_id, cls.config_name, subfolder=subfolder)) == 0: + logger.info( + f"{cls.config_name} not found in the specified subfolder {subfolder}. Using the top level {cls.config_name}." + ) + config_folder = "" + except OfflineModeIsEnabled: + pass library_name = TasksManager.infer_library_from_model( model_id, subfolder=config_folder, revision=revision, cache_dir=cache_dir, token=token From 2837cab34a6553ac411d95d1fea3f56580ffe164 Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Wed, 9 Oct 2024 16:28:01 +0200 Subject: [PATCH 4/7] add log --- optimum/modeling_base.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/optimum/modeling_base.py b/optimum/modeling_base.py index ad1ec06446..275bf65cc0 100644 --- a/optimum/modeling_base.py +++ b/optimum/modeling_base.py @@ -382,7 +382,6 @@ def from_pretrained( ) model_id, revision = model_id.split("@") - # TODO: enable this when offline config_folder = subfolder try: if len(find_files_matching_pattern(model_id, cls.config_name, subfolder=subfolder)) == 0: @@ -391,7 +390,8 @@ def from_pretrained( ) config_folder = "" except OfflineModeIsEnabled: - pass + # TODO: enable this for offline mode by checking the cache + logger.info(f"Offline mode enabled, the {cls.config_name} is expected to be in the subfolder {subfolder}.") library_name = TasksManager.infer_library_from_model( model_id, subfolder=config_folder, revision=revision, cache_dir=cache_dir, token=token From 82f26997ab6af155982628549243ef8e6bf4bb1b Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Wed, 9 Oct 2024 18:26:29 +0200 Subject: [PATCH 5/7] fix --- optimum/modeling_base.py | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/optimum/modeling_base.py b/optimum/modeling_base.py index 275bf65cc0..48c738514a 100644 --- a/optimum/modeling_base.py +++ b/optimum/modeling_base.py @@ -24,12 +24,10 @@ from huggingface_hub import create_repo, upload_file from huggingface_hub.constants import HUGGINGFACE_HUB_CACHE -from huggingface_hub.errors import OfflineModeIsEnabled from transformers import AutoConfig, PretrainedConfig, add_start_docstrings from .exporters import TasksManager from .utils import CONFIG_NAME -from .utils.file_utils import find_files_matching_pattern if TYPE_CHECKING: @@ -382,16 +380,20 @@ def from_pretrained( ) model_id, revision = model_id.split("@") + all_files, _ = TasksManager.get_model_files( + model_id, + subfolder=subfolder, + cache_dir=cache_dir, + revision=revision, + token=token, + ) + config_folder = subfolder - try: - if len(find_files_matching_pattern(model_id, cls.config_name, subfolder=subfolder)) == 0: - logger.info( - f"{cls.config_name} not found in the specified subfolder {subfolder}. Using the top level {cls.config_name}." - ) - config_folder = "" - except OfflineModeIsEnabled: - # TODO: enable this for offline mode by checking the cache - logger.info(f"Offline mode enabled, the {cls.config_name} is expected to be in the subfolder {subfolder}.") + if cls.config_name not in all_files: + logger.info( + f"{cls.config_name} not found in the specified subfolder {subfolder}. Using the top level {cls.config_name}." + ) + config_folder = "" library_name = TasksManager.infer_library_from_model( model_id, subfolder=config_folder, revision=revision, cache_dir=cache_dir, token=token From 9b6c2218bd08cd0e241daef581153e96ce164065 Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Wed, 9 Oct 2024 19:41:53 +0200 Subject: [PATCH 6/7] enable load local model in subfolder --- optimum/onnxruntime/modeling_ort.py | 6 ++---- tests/onnxruntime/test_modeling.py | 14 ++++++++++++++ 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/optimum/onnxruntime/modeling_ort.py b/optimum/onnxruntime/modeling_ort.py index 9b29afa566..ce1d68536a 100644 --- a/optimum/onnxruntime/modeling_ort.py +++ b/optimum/onnxruntime/modeling_ort.py @@ -510,13 +510,12 @@ def _from_pretrained( if file_name is None: if model_path.is_dir(): - onnx_files = list(model_path.glob("*.onnx")) + onnx_files = list((model_path / subfolder).glob("*.onnx")) else: repo_files, _ = TasksManager.get_model_files( model_id, revision=revision, cache_dir=cache_dir, token=token ) repo_files = map(Path, repo_files) - pattern = "*.onnx" if subfolder == "" else f"{subfolder}/*.onnx" onnx_files = [p for p in repo_files if p.match(pattern)] @@ -983,10 +982,9 @@ def _cached_file( token = use_auth_token model_path = Path(model_path) - # locates a file in a local folder and repo, downloads and cache it if necessary. if model_path.is_dir(): - model_cache_path = model_path / file_name + model_cache_path = model_path / subfolder / file_name preprocessors = maybe_load_preprocessors(model_path.as_posix()) else: model_cache_path = hf_hub_download( diff --git a/tests/onnxruntime/test_modeling.py b/tests/onnxruntime/test_modeling.py index 665f253c48..abf508a80c 100644 --- a/tests/onnxruntime/test_modeling.py +++ b/tests/onnxruntime/test_modeling.py @@ -28,6 +28,7 @@ import requests import timm import torch +from huggingface_hub import HfApi from huggingface_hub.constants import default_cache_path from parameterized import parameterized from PIL import Image @@ -1263,6 +1264,19 @@ def test_trust_remote_code(self): torch.allclose(pt_logits, ort_logits, atol=1e-4), f" Maxdiff: {torch.abs(pt_logits - ort_logits).max()}" ) + @parameterized.expand(("", "onnx")) + def test_loading_with_config_in_root(self, subfolder): + # config.json file in the root directory and not in the subfolder + model_id = "sentence-transformers-testing/stsb-bert-tiny-onnx" + # hub model + ORTModelForFeatureExtraction.from_pretrained(model_id, subfolder=subfolder, export=subfolder == "") + # local model + api = HfApi() + with tempfile.TemporaryDirectory() as tmpdirname: + local_dir = Path(tmpdirname) / "model" + api.snapshot_download(repo_id=model_id, local_dir=local_dir) + ORTModelForFeatureExtraction.from_pretrained(local_dir, subfolder=subfolder, export=subfolder == "") + class ORTModelForQuestionAnsweringIntegrationTest(ORTModelTestMixin): SUPPORTED_ARCHITECTURES = [ From 2e6ed87204ff46f8c73f584552bad42993fe2658 Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Thu, 10 Oct 2024 10:44:31 +0200 Subject: [PATCH 7/7] fix windows --- tests/onnxruntime/test_modeling.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/onnxruntime/test_modeling.py b/tests/onnxruntime/test_modeling.py index abf508a80c..501c7dac24 100644 --- a/tests/onnxruntime/test_modeling.py +++ b/tests/onnxruntime/test_modeling.py @@ -1265,7 +1265,7 @@ def test_trust_remote_code(self): ) @parameterized.expand(("", "onnx")) - def test_loading_with_config_in_root(self, subfolder): + def test_loading_with_config_not_from_subfolder(self, subfolder): # config.json file in the root directory and not in the subfolder model_id = "sentence-transformers-testing/stsb-bert-tiny-onnx" # hub model @@ -1276,6 +1276,7 @@ def test_loading_with_config_in_root(self, subfolder): local_dir = Path(tmpdirname) / "model" api.snapshot_download(repo_id=model_id, local_dir=local_dir) ORTModelForFeatureExtraction.from_pretrained(local_dir, subfolder=subfolder, export=subfolder == "") + remove_directory(tmpdirname) class ORTModelForQuestionAnsweringIntegrationTest(ORTModelTestMixin):