Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix for custom pipeline configuration #29004

Merged
5 changes: 5 additions & 0 deletions src/transformers/configuration_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
CONFIG_NAME,
PushToHubMixin,
add_model_info_to_auto_map,
add_model_info_to_custom_pipelines,
cached_file,
copy_func,
download_url,
Expand Down Expand Up @@ -729,6 +730,10 @@ def _get_config_dict(
config_dict["auto_map"] = add_model_info_to_auto_map(
config_dict["auto_map"], pretrained_model_name_or_path
)
if "custom_pipelines" in config_dict and not is_local:
config_dict["custom_pipelines"] = add_model_info_to_custom_pipelines(
config_dict["custom_pipelines"], pretrained_model_name_or_path
)
return config_dict, kwargs

@classmethod
Expand Down
14 changes: 10 additions & 4 deletions src/transformers/feature_extraction_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
PushToHubMixin,
TensorType,
add_model_info_to_auto_map,
add_model_info_to_custom_pipelines,
cached_file,
copy_func,
download_url,
Expand Down Expand Up @@ -539,10 +540,15 @@ def get_feature_extractor_dict(
f"loading configuration file {feature_extractor_file} from cache at {resolved_feature_extractor_file}"
)

if "auto_map" in feature_extractor_dict and not is_local:
feature_extractor_dict["auto_map"] = add_model_info_to_auto_map(
feature_extractor_dict["auto_map"], pretrained_model_name_or_path
)
if not is_local:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

just curious as to why we only do it if not local whereas before we did it for local and not local? 🤗

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what does is_local mean ?

if i call a model using this

path = "./folder1/folder2/folder3"
from transformers import AutoModelForImageClassification
model = AutoModelForImageClassification.from_pretrained(path, trust_remote_code=True)

if the path exists in my pc then we load from there

what if the is_local is false ?

then in this case we call the model from the hub and add the tag

is the tag added when we are only calling a custom model from huggingface ?

yes, there is no need to add the tag if we are calling the model from the local pc

hope this answers your questions 🤗

if "auto_map" in feature_extractor_dict:
feature_extractor_dict["auto_map"] = add_model_info_to_auto_map(
feature_extractor_dict["auto_map"], pretrained_model_name_or_path
)
if "custom_pipelines" in feature_extractor_dict:
feature_extractor_dict["custom_pipelines"] = add_model_info_to_custom_pipelines(
feature_extractor_dict["custom_pipelines"], pretrained_model_name_or_path
)

return feature_extractor_dict, kwargs

Expand Down
15 changes: 10 additions & 5 deletions src/transformers/image_processing_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
IMAGE_PROCESSOR_NAME,
PushToHubMixin,
add_model_info_to_auto_map,
add_model_info_to_custom_pipelines,
cached_file,
copy_func,
download_url,
Expand Down Expand Up @@ -375,11 +376,15 @@ def get_image_processor_dict(
f"loading configuration file {image_processor_file} from cache at {resolved_image_processor_file}"
)

if "auto_map" in image_processor_dict and not is_local:
image_processor_dict["auto_map"] = add_model_info_to_auto_map(
image_processor_dict["auto_map"], pretrained_model_name_or_path
)

if not is_local:
if "auto_map" in image_processor_dict:
image_processor_dict["auto_map"] = add_model_info_to_auto_map(
image_processor_dict["auto_map"], pretrained_model_name_or_path
)
if "custom_pipelines" in image_processor_dict:
image_processor_dict["custom_pipelines"] = add_model_info_to_custom_pipelines(
image_processor_dict["custom_pipelines"], pretrained_model_name_or_path
)
return image_processor_dict, kwargs

@classmethod
Expand Down
14 changes: 10 additions & 4 deletions src/transformers/processing_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
PROCESSOR_NAME,
PushToHubMixin,
add_model_info_to_auto_map,
add_model_info_to_custom_pipelines,
cached_file,
copy_func,
direct_transformers_import,
Expand Down Expand Up @@ -355,10 +356,15 @@ def get_processor_dict(
else:
logger.info(f"loading configuration file {processor_file} from cache at {resolved_processor_file}")

if "auto_map" in processor_dict and not is_local:
processor_dict["auto_map"] = add_model_info_to_auto_map(
processor_dict["auto_map"], pretrained_model_name_or_path
)
if not is_local:
if "auto_map" in processor_dict:
processor_dict["auto_map"] = add_model_info_to_auto_map(
processor_dict["auto_map"], pretrained_model_name_or_path
)
if "custom_pipelines" in processor_dict:
processor_dict["custom_pipelines"] = add_model_info_to_custom_pipelines(
processor_dict["custom_pipelines"], pretrained_model_name_or_path
)

return processor_dict, kwargs

Expand Down
20 changes: 13 additions & 7 deletions src/transformers/tokenization_utils_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
TensorType,
add_end_docstrings,
add_model_info_to_auto_map,
add_model_info_to_custom_pipelines,
cached_file,
copy_func,
download_url,
Expand Down Expand Up @@ -2152,13 +2153,18 @@ def _from_pretrained(
config_tokenizer_class = None
init_kwargs = init_configuration

if "auto_map" in init_kwargs and not _is_local:
# For backward compatibility with odl format.
if isinstance(init_kwargs["auto_map"], (tuple, list)):
init_kwargs["auto_map"] = {"AutoTokenizer": init_kwargs["auto_map"]}
init_kwargs["auto_map"] = add_model_info_to_auto_map(
init_kwargs["auto_map"], pretrained_model_name_or_path
)
if not _is_local:
if "auto_map" in init_kwargs:
# For backward compatibility with odl format.
if isinstance(init_kwargs["auto_map"], (tuple, list)):
init_kwargs["auto_map"] = {"AutoTokenizer": init_kwargs["auto_map"]}
init_kwargs["auto_map"] = add_model_info_to_auto_map(
init_kwargs["auto_map"], pretrained_model_name_or_path
)
if "custom_pipelines" in init_kwargs:
init_kwargs["custom_pipelines"] = add_model_info_to_custom_pipelines(
init_kwargs["custom_pipelines"], pretrained_model_name_or_path
)

if config_tokenizer_class is None:
# Matt: This entire block is only used to decide if the tokenizer class matches the class in the repo.
Expand Down
1 change: 1 addition & 0 deletions src/transformers/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
PaddingStrategy,
TensorType,
add_model_info_to_auto_map,
add_model_info_to_custom_pipelines,
cached_property,
can_return_loss,
expand_dims,
Expand Down
13 changes: 13 additions & 0 deletions src/transformers/utils/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -717,6 +717,19 @@ def add_model_info_to_auto_map(auto_map, repo_id):
return auto_map


def add_model_info_to_custom_pipelines(custom_pipeline, repo_id):
"""
Adds the information of the repo_id to a given custom pipeline.
"""
# {custom_pipelines : {task: {"impl": "path.to.task"},...} }
for task in custom_pipeline.keys():
if "impl" in custom_pipeline[task]:
module = custom_pipeline[task]["impl"]
if "--" not in module:
custom_pipeline[task]["impl"] = f"{repo_id}--{module}"
return custom_pipeline


def infer_framework(model_class):
"""
Infers the framework of a given model without using isinstance(), because we cannot guarantee that the relevant
Expand Down
18 changes: 18 additions & 0 deletions tests/pipelines/test_pipelines_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -882,6 +882,24 @@ def test_push_to_hub_dynamic_pipeline(self):
# Can't make an isinstance check because the new_classifier is from the PairClassificationPipeline class of a
# dynamic module
self.assertEqual(new_classifier.__class__.__name__, "PairClassificationPipeline")
# check for tag exitence, tag needs to be added when we are calling a custom pipeline from the hub
# useful for cases such as finetuning
self.assertDictEqual(
new_classifier.model.config.custom_pipelines,
{
"pair-classification": {
"impl": f"{USER}/test-dynamic-pipeline--custom_pipeline.PairClassificationPipeline",
"pt": ("AutoModelForSequenceClassification",),
"tf": (),
}
},
)
# test if the pipeline still works after the model is finetuned
# (we are actually testing if the pipeline still works from the final repo)
# this is where the user/repo--module.class is used for
new_classifier.model.push_to_hub(repo_name=f"{USER}/test-pipeline-for-a-finetuned-model", token=self._token)
del new_classifier # free up memory
new_classifier = pipeline(model=f"{USER}/test-pipeline-for-a-finetuned-model", trust_remote_code=True)

results = classifier("I hate you", second_text="I love you")
new_results = new_classifier("I hate you", second_text="I love you")
Expand Down