Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

FIX overwriting metadata when both verified and unverified reported values #1186

Merged
merged 2 commits into from
Nov 15, 2022
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 16 additions & 25 deletions src/huggingface_hub/repocard.py
Original file line number Diff line number Diff line change
Expand Up @@ -788,47 +788,38 @@ def metadata_update(
else:
existing_results = card.data.eval_results

# Iterate over new results
# Iterate over existing results
# If both results describe the same metric but value is different:
# If overwrite=True: overwrite the metric value
# Else: raise ValueError
# Else: append new result to existing ones.
for new_result in new_results:
result_found = False
for existing_result_index, existing_result in enumerate(
existing_results
):
if all(
[
new_result.dataset_name == existing_result.dataset_name,
new_result.dataset_type == existing_result.dataset_type,
new_result.task_type == existing_result.task_type,
new_result.task_name == existing_result.task_name,
new_result.metric_name == existing_result.metric_name,
new_result.metric_type == existing_result.metric_type,
]
):
if (
new_result.metric_value != existing_result.metric_value
and not overwrite
):
existing_str = (
f"name: {new_result.metric_name}, type:"
f" {new_result.metric_type}"
)
for existing_result in existing_results:
if new_result.is_equal_except_value(existing_result):
if new_result != existing_result and not overwrite:
raise ValueError(
"You passed a new value for the existing metric"
f" '{existing_str}'. Set `overwrite=True` to"
" overwrite existing metrics."
f" 'name: {new_result.metric_name}, type: "
f" {new_result.metric_type}'. Set `overwrite=True`"
" to overwrite existing metrics."
)
result_found = True
card.data.eval_results[existing_result_index] = new_result
existing_result.metric_value = new_result.metric_value
if not result_found:
card.data.eval_results.append(new_result)
else:
# Any metadata that is not a result metric
if (
hasattr(card.data, key)
and getattr(card.data, key) is not None
and not overwrite
and getattr(card.data, key) != value
):
raise ValueError(
f"""You passed a new value for the existing meta data field '{key}'. Set `overwrite=True` to overwrite existing metadata."""
f"You passed a new value for the existing meta data field '{key}'."
" Set `overwrite=True` to overwrite existing metadata."
)
else:
setattr(card.data, key, value)
Expand Down
12 changes: 12 additions & 0 deletions src/huggingface_hub/repocard_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,18 @@ class EvalResult:
# A JSON Web Token that is used to verify whether the metrics originate from Hugging Face's [evaluation service](https://huggingface.co/spaces/autoevaluate/model-evaluator) or not.
verify_token: Optional[str] = None

def is_equal_except_value(self, other: "EvalResult") -> bool:
"""
Return True if `self` and `other` describe exactly the same metric but with a
different value.
"""
for key, _ in self.__dict__.items():
if key == "metric_value":
continue
if getattr(self, key) != getattr(other, key):
return False
return True


@dataclass
class CardData:
Expand Down
133 changes: 85 additions & 48 deletions tests/test_repocard.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,43 @@
---
"""

DUMMY_MODELCARD_EVAL_RESULT_BOTH_VERIFIED_AND_UNVERIFIED = """---
model-index:
- name: RoBERTa fine-tuned on ReactionGIF
results:
- task:
type: text-classification
name: Text Classification
dataset:
name: ReactionGIF
type: julien-c/reactiongif
config: default
split: test
metrics:
- type: accuracy
value: 0.2662102282047272
name: Accuracy
config: default
verified: false
- task:
type: text-classification
name: Text Classification
dataset:
name: ReactionGIF
type: julien-c/reactiongif
config: default
split: test
metrics:
- type: accuracy
value: 0.6666666666666666
name: Accuracy
config: default
verified: true
---

This is a test model card.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Small suggestion to help community developers understand what this is about:

Suggested change
This is a test model card.
This is a test model card containing one self-reported metric and one "verified" metric in the format produced by Hugging Face's [model evaluation service](https://huggingface.co/spaces/autoevaluate/model-evaluator).

"""

logger = logging.get_logger(__name__)

REPOCARD_DIR = os.path.join(
Expand Down Expand Up @@ -240,17 +277,18 @@ def setUpClass(cls):
def setUp(self) -> None:
self.repo_path = Path(tempfile.mkdtemp())
self.REPO_NAME = repo_name()
self._api.create_repo(f"{USER}/{self.REPO_NAME}")
self.repo_id = f"{USER}/{self.REPO_NAME}"
self._api.create_repo(self.repo_id)
self._api.upload_file(
path_or_fileobj=DUMMY_MODELCARD_EVAL_RESULT.encode(),
repo_id=f"{USER}/{self.REPO_NAME}",
repo_id=self.repo_id,
path_in_repo="README.md",
commit_message="Add README to main branch",
)

self.repo = Repository(
self.repo_path / self.REPO_NAME,
clone_from=f"{USER}/{self.REPO_NAME}",
clone_from=self.repo_id,
use_auth_token=self._token,
git_user="ci",
git_email="ci@dummy.com",
Expand All @@ -260,14 +298,12 @@ def setUp(self) -> None:
)

def tearDown(self) -> None:
self._api.delete_repo(repo_id=f"{self.REPO_NAME}")
self._api.delete_repo(repo_id=self.repo_id)
shutil.rmtree(self.repo_path)

def test_update_dataset_name(self):
new_datasets_data = {"datasets": ["test/test_dataset"]}
metadata_update(
f"{USER}/{self.REPO_NAME}", new_datasets_data, token=self._token
)
metadata_update(self.repo_id, new_datasets_data, token=self._token)

self.repo.git_pull()
updated_metadata = metadata_load(self.repo_path / self.REPO_NAME / "README.md")
Expand All @@ -280,9 +316,7 @@ def test_update_existing_result_with_overwrite(self):
new_metadata["model-index"][0]["results"][0]["metrics"][0][
"value"
] = 0.2862102282047272
metadata_update(
f"{USER}/{self.REPO_NAME}", new_metadata, token=self._token, overwrite=True
)
metadata_update(self.repo_id, new_metadata, token=self._token, overwrite=True)

self.repo.git_pull()
updated_metadata = metadata_load(self.repo_path / self.REPO_NAME / "README.md")
Expand All @@ -293,14 +327,12 @@ def test_metadata_update_upstream(self):
new_metadata["model-index"][0]["results"][0]["metrics"][0]["value"] = 0.1

path = hf_hub_download(
f"{USER}/{self.REPO_NAME}",
self.repo_id,
filename=REPOCARD_NAME,
use_auth_token=self._token,
)

metadata_update(
f"{USER}/{self.REPO_NAME}", new_metadata, token=self._token, overwrite=True
)
metadata_update(self.repo_id, new_metadata, token=self._token, overwrite=True)

self.assertNotEqual(metadata_load(path), new_metadata)
self.assertEqual(metadata_load(path), self.existing_metadata)
Expand All @@ -319,17 +351,12 @@ def test_update_existing_result_without_overwrite(self):
),
):
metadata_update(
f"{USER}/{self.REPO_NAME}",
new_metadata,
token=self._token,
overwrite=False,
self.repo_id, new_metadata, token=self._token, overwrite=False
)

def test_update_existing_field_without_overwrite(self):
new_datasets_data = {"datasets": "['test/test_dataset']"}
metadata_update(
f"{USER}/{self.REPO_NAME}", new_datasets_data, token=self._token
)
metadata_update(self.repo_id, new_datasets_data, token=self._token)

with pytest.raises(
ValueError,
Expand All @@ -340,7 +367,7 @@ def test_update_existing_field_without_overwrite(self):
):
new_datasets_data = {"datasets": "['test/test_dataset_2']"}
metadata_update(
f"{USER}/{self.REPO_NAME}",
self.repo_id,
new_datasets_data,
token=self._token,
overwrite=False,
Expand All @@ -362,9 +389,7 @@ def test_update_new_result_existing_dataset(self):
dataset_split="test",
)

metadata_update(
f"{USER}/{self.REPO_NAME}", new_result, token=self._token, overwrite=False
)
metadata_update(self.repo_id, new_result, token=self._token, overwrite=False)

expected_metadata = copy.deepcopy(self.existing_metadata)
expected_metadata["model-index"][0]["results"][0]["metrics"].append(
Expand All @@ -391,9 +416,7 @@ def test_update_new_result_new_dataset(self):
dataset_split="test",
)

metadata_update(
f"{USER}/{self.REPO_NAME}", new_result, token=self._token, overwrite=False
)
metadata_update(self.repo_id, new_result, token=self._token, overwrite=False)

expected_metadata = copy.deepcopy(self.existing_metadata)
expected_metadata["model-index"][0]["results"].append(
Expand All @@ -412,7 +435,7 @@ def test_update_metadata_on_empty_text_content(self) -> None:
with self.repo.commit("Add README to main branch"):
with open("README.md", "w") as f:
f.write(DUMMY_MODELCARD_NO_TEXT_CONTENT)
metadata_update(f"{USER}/{self.REPO_NAME}", {"tag": "test"}, token=self._token)
metadata_update(self.repo_id, {"tag": "test"}, token=self._token)

# Check update went fine
self.repo.git_pull()
Expand All @@ -426,43 +449,57 @@ def test_update_with_existing_name(self):
new_metadata["model-index"][0]["results"][0]["metrics"][0][
"value"
] = 0.2862102282047272
metadata_update(self.repo_id, new_metadata, token=self._token, overwrite=True)

metadata_update(
f"{USER}/{self.REPO_NAME}",
new_metadata,
token=self._token,
overwrite=True,
)

card_data = ModelCard.load(f"{USER}/{self.REPO_NAME}", token=self._token)

card_data = ModelCard.load(self.repo_id, token=self._token)
self.assertEqual(
card_data.data.model_name, self.existing_metadata["model-index"][0]["name"]
)

def test_update_without_existing_name(self):

# delete existing metadata
self._api.upload_file(
path_or_fileobj="# Test".encode(),
repo_id=f"{USER}/{self.REPO_NAME}",
repo_id=self.repo_id,
path_in_repo="README.md",
commit_message="Add README to main branch",
)

new_metadata = copy.deepcopy(self.existing_metadata)
new_metadata["model-index"][0].pop("name")

metadata_update(
f"{USER}/{self.REPO_NAME}",
new_metadata,
token=self._token,
overwrite=True,
metadata_update(self.repo_id, new_metadata, token=self._token, overwrite=True)

card_data = ModelCard.load(self.repo_id, token=self._token)

self.assertEqual(card_data.data.model_name, self.repo_id)

def test_update_with_both_verified_and_unverified_metric(self):
"""Regression test for #1185.

See https://github.com/huggingface/huggingface_hub/issues/1185.
"""
self._api.upload_file(
path_or_fileobj=DUMMY_MODELCARD_EVAL_RESULT_BOTH_VERIFIED_AND_UNVERIFIED.encode(),
repo_id=self.repo_id,
path_in_repo="README.md",
)
card = ModelCard.load(self.repo_id)
metadata = card.data.to_dict()
metadata_update(self.repo_id, metadata=metadata, overwrite=True, token=TOKEN)

card_data = ModelCard.load(self.repo_id, token=self._token)

self.assertEqual(len(card_data.data.eval_results), 2)
first_result = card_data.data.eval_results[0]
second_result = card_data.data.eval_results[1]

card_data = ModelCard.load(f"{USER}/{self.REPO_NAME}", token=self._token)
# One is verified, the other not
self.assertFalse(first_result.verified)
self.assertTrue(second_result.verified)

self.assertEqual(card_data.data.model_name, f"{USER}/{self.REPO_NAME}")
# Result values are different
self.assertEqual(first_result.metric_value, 0.2662102282047272)
self.assertEqual(second_result.metric_value, 0.6666666666666666)


class TestMetadataUpdateOnMissingCard(unittest.TestCase):
Expand Down