Skip to content

Commit

Permalink
FIX [Trainer / tags]: Fix trainer + tags when users do not pass `"t…
Browse files Browse the repository at this point in the history
…ags"` to `trainer.push_to_hub()` (#29009)

* fix trainer tags

* add test
  • Loading branch information
younesbelkada authored and Ita Zaporozhets committed May 14, 2024
1 parent e097535 commit 0aa457b
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 3 deletions.
5 changes: 4 additions & 1 deletion src/transformers/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -3842,7 +3842,10 @@ def push_to_hub(self, commit_message: Optional[str] = "End of training", blockin
# Add additional tags in the case the model has already some tags and users pass
# "tags" argument to `push_to_hub` so that trainer automatically handles internal tags
# from all models since Trainer does not call `model.push_to_hub`.
if "tags" in kwargs and getattr(self.model, "model_tags", None) is not None:
if getattr(self.model, "model_tags", None) is not None:
if "tags" not in kwargs:
kwargs["tags"] = []

# If it is a string, convert it to a list
if isinstance(kwargs["tags"], str):
kwargs["tags"] = [kwargs["tags"]]
Expand Down
35 changes: 33 additions & 2 deletions tests/trainer/test_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
from unittest.mock import Mock, patch

import numpy as np
from huggingface_hub import HfFolder, delete_repo, list_repo_commits, list_repo_files
from huggingface_hub import HfFolder, ModelCard, delete_repo, list_repo_commits, list_repo_files
from parameterized import parameterized
from requests.exceptions import HTTPError

Expand Down Expand Up @@ -2564,7 +2564,13 @@ def setUpClass(cls):

@classmethod
def tearDownClass(cls):
for model in ["test-trainer", "test-trainer-epoch", "test-trainer-step", "test-trainer-tensorboard"]:
for model in [
"test-trainer",
"test-trainer-epoch",
"test-trainer-step",
"test-trainer-tensorboard",
"test-trainer-tags",
]:
try:
delete_repo(token=cls._token, repo_id=model)
except HTTPError:
Expand Down Expand Up @@ -2695,6 +2701,31 @@ def test_push_to_hub_with_tensorboard_logs(self):

assert found_log is True, "No tensorboard log found in repo"

def test_push_to_hub_tags(self):
# Checks if `trainer.push_to_hub()` works correctly by adding the desired
# tag without having to pass `tags` in `push_to_hub`
# see:
with tempfile.TemporaryDirectory() as tmp_dir:
trainer = get_regression_trainer(
output_dir=os.path.join(tmp_dir, "test-trainer-tags"),
push_to_hub=True,
hub_token=self._token,
)

trainer.model.add_model_tags(["test-trainer-tags"])

url = trainer.push_to_hub()

# Extract repo_name from the url
re_search = re.search(ENDPOINT_STAGING + r"/([^/]+/[^/]+)/", url)
self.assertTrue(re_search is not None)
repo_name = re_search.groups()[0]

self.assertEqual(repo_name, f"{USER}/test-trainer-tags")

model_card = ModelCard.load(repo_name)
self.assertTrue("test-trainer-tags" in model_card.data.tags)


@require_torch
@require_optuna
Expand Down

0 comments on commit 0aa457b

Please sign in to comment.