Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Hub verification token to evaluation metadata #1142

Merged
merged 13 commits into from
Nov 4, 2022
10 changes: 7 additions & 3 deletions src/huggingface_hub/repocard.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@

from .constants import REPOCARD_NAME
from .utils import EntryNotFoundError, validate_hf_hub_args
from .utils._deprecation import _deprecate_positional_args
from .utils.logging import get_logger


Expand Down Expand Up @@ -533,6 +534,7 @@ def metadata_save(local_path: Union[str, Path], data: Dict) -> None:
readme.close()


@_deprecate_positional_args(version="0.12")
def metadata_eval_result(
model_pretty_name: str,
lewtun marked this conversation as resolved.
Show resolved Hide resolved
task_pretty_name: str,
Expand All @@ -547,6 +549,7 @@ def metadata_eval_result(
dataset_config: Optional[str] = None,
dataset_split: Optional[str] = None,
dataset_revision: Optional[str] = None,
metrics_verification_token: Optional[str] = None,
Wauplin marked this conversation as resolved.
Show resolved Hide resolved
) -> Dict:
"""
Creates a metadata dict with the result from a model evaluated on a dataset.
Expand All @@ -572,16 +575,16 @@ def metadata_eval_result(
The name of the metric configuration used in `load_metric()`.
Example: bleurt-large-512 in `load_metric("bleurt", "bleurt-large-512")`.
metrics_verified (`bool`, *optional*, defaults to `False`):
If true, indicates that evaluation was generated by Hugging Face (vs. self-reported).
If a user tries to push self-reported metric results with verified=True, the push
will be rejected.
Indicates whether the metrics have originate from Hugging Face's [evaluation service](https://huggingface.co/spaces/autoevaluate/model-evaluator) or not. Automatically computed by Hugging Face, do not set.
dataset_config (`str`, *optional*):
Example: fr. The name of the dataset configuration used in `load_dataset()`.
dataset_split (`str`, *optional*):
Example: test. The name of the dataset split used in `load_dataset()`.
dataset_revision (`str`, *optional*):
Example: 5503434ddd753f426f4b38109466949a1217c2bb. The name of the dataset dataset revision
used in `load_dataset()`.
metrics_verification_token (`bool`, *optional*):
A JSON Web Token that is used to verify whether the metrics originate from Hugging Face's [evaluation service](https://huggingface.co/spaces/autoevaluate/model-evaluator) or not.

Returns:
`dict`: a metadata dict with the result from a model evaluated on a dataset.
Expand Down Expand Up @@ -649,6 +652,7 @@ def metadata_eval_result(
dataset_type=dataset_id,
metric_config=metrics_config,
verified=metrics_verified,
verifyToken=metrics_verification_token,
dataset_config=dataset_config,
dataset_split=dataset_split,
dataset_revision=dataset_revision,
Expand Down
12 changes: 10 additions & 2 deletions src/huggingface_hub/repocard_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,9 @@ class EvalResult:
metric_args (`Dict[str, Any]`, *optional*):
The arguments passed during `Metric.compute()`. Example for `bleu`: max_order: 4
verified (`bool`, *optional*):
If true, indicates that evaluation was generated by Hugging Face (vs. self-reported).
Indicates whether the metrics have originate from Hugging Face's [evaluation service](https://huggingface.co/spaces/autoevaluate/model-evaluator) or not. Automatically computed by Hugging Face, do not set.
verifyToken (`str`, *optional*):
A JSON Web Token that is used to verify whether the metrics originate from Hugging Face's [evaluation service](https://huggingface.co/spaces/autoevaluate/model-evaluator) or not.
"""

# Required
Expand Down Expand Up @@ -113,9 +115,12 @@ class EvalResult:
# Example for `bleu`: max_order: 4
metric_args: Optional[Dict[str, Any]] = None

# If true, indicates that evaluation was generated by Hugging Face (vs. self-reported).
# Automatically computed, do not set. Dynamically overridden by Hugging Face in API calls to indicate if evaluation was verified by Hugging Face.
verified: Optional[bool] = None

# Generated by Hugging Face to verify the results are valid.
verifyToken: Optional[str] = None
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@lewtun Sorry to ask for a last change but would it be possible to use snake_case here ? I think keeping consistency within the hfh parameters is more important than keeping consistency with the server naming.

This would require to change both the getter and the setter in

# in model_index_to_eval_results
verify_token = metric.get("verifyToken")
(...)
verify_token=verify_token,

and

# in eval_results_to_model_index
"verifyToken": result.verify_token,

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks, I agree snake_case is nicer :)

Fixed in f0d1cc4 and 6995d2a



@dataclass
class CardData:
Expand Down Expand Up @@ -416,6 +421,7 @@ def model_index_to_eval_results(
metric_args = metric.get("args")
metric_config = metric.get("config")
verified = metric.get("verified")
verifyToken = metric.get("verifyToken")

eval_result = EvalResult(
task_type=task_type, # Required
Expand All @@ -432,6 +438,7 @@ def model_index_to_eval_results(
metric_args=metric_args,
metric_config=metric_config,
verified=verified,
verifyToken=verifyToken,
)
eval_results.append(eval_result)
return name, eval_results
Expand Down Expand Up @@ -521,6 +528,7 @@ def eval_results_to_model_index(
"config": result.metric_config,
"args": result.metric_args,
"verified": result.verified,
"verifyToken": result.verifyToken,
}
for result in results
],
Expand Down