Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

MNT Fixes for hf_hub v0.11.0 #213

Merged
merged 10 commits into from
Nov 17, 2022
75 changes: 38 additions & 37 deletions skops/card/_model_card.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from reprlib import Repr
from typing import Any, Optional, Union

from huggingface_hub import CardData, ModelCard
from huggingface_hub import ModelCard, ModelCardData
from sklearn.utils import estimator_html_repr
from tabulate import tabulate # type: ignore

Expand Down Expand Up @@ -107,13 +107,13 @@ def __repr__(self) -> str:
return f"Table({nrows}x{ncols})"


def metadata_from_config(config_path: Union[str, Path]) -> CardData:
"""Construct a ``CardData`` object from a ``config.json`` file.
def metadata_from_config(config_path: Union[str, Path]) -> ModelCardData:
"""Construct a ``ModelCardData`` object from a ``config.json`` file.

Most information needed for the metadata section of a ``README.md`` file on
Hugging Face Hub is included in the ``config.json`` file. This utility
function constructs a :class:`huggingface_hub.CardData` object which can
then be passed to the :class:`~skops.card.Card` object.
function constructs a :class:`huggingface_hub.ModelCardData` object which
can then be passed to the :class:`~skops.card.Card` object.

This method populates the following attributes of the instance:

Expand All @@ -133,8 +133,8 @@ def metadata_from_config(config_path: Union[str, Path]) -> CardData:

Returns
-------
card_data: huggingface_hub.CardData
:class:`huggingface_hub.CardData` object.
card_data: huggingface_hub.ModelCardData
:class:`huggingface_hub.ModelCardData` object.

"""
config_path = Path(config_path)
Expand All @@ -144,19 +144,19 @@ def metadata_from_config(config_path: Union[str, Path]) -> CardData:
with open(config_path) as f:
config = json.load(f)

card_data = CardData()
card_data = ModelCardData()
card_data.library_name = "sklearn"
card_data.tags = ["sklearn", "skops"]
task = config.get("sklearn", {}).get("task", None)
if task:
card_data.tags += [task]
card_data.model_file = config.get("sklearn", {}).get("model", {}).get("file")
card_data.model_file = config.get("sklearn", {}).get("model", {}).get("file") # type: ignore
example_input = config.get("sklearn", {}).get("example_input", None)
# Documentation on what the widget expects:
# https://huggingface.co/docs/hub/models-widgets-examples
if example_input:
if "tabular" in task:
card_data.widget = {"structuredData": example_input}
card_data.widget = {"structuredData": example_input} # type: ignore
# TODO: add text data example here.

return card_data
Expand All @@ -178,9 +178,10 @@ class Card:
model_diagram: bool, default=True
Set to True if model diagram should be plotted in the card.

metadata: CardData, optional
``CardData`` object. The contents of this object are saved as metadata
at the beginning of the output file, and used by Hugging Face Hub.
metadata: ModelCardData, optional
:class:`huggingface_hub.ModelCardData` object. The contents of this
object are saved as metadata at the beginning of the output file, and
used by Hugging Face Hub.

You can use :func:`~skops.card.metadata_from_config` to create an
instance pre-populated with necessary information based on the contents
Expand All @@ -192,7 +193,7 @@ class Card:
model: estimator object
The scikit-learn compatible model that will be documented.

metadata: CardData
metadata: ModelCardData
Metadata to be stored at the beginning of the saved model card, as
metadata to be understood by the Hugging Face Hub.

Expand Down Expand Up @@ -246,20 +247,21 @@ class Card:
confusion_matrix='...confusion_matrix.png',
)
>>> model_card.save(tmp_path / "README.md")

"""

def __init__(
self,
model: Any,
model_diagram: bool = True,
metadata: Optional[CardData] = None,
metadata: Optional[ModelCardData] = None,
) -> None:
self.model = model
self.model_diagram = model_diagram
self._eval_results = {} # type: ignore
self._template_sections: dict[str, str] = {}
self._extra_sections: list[tuple[str, Any]] = []
self.metadata = metadata or CardData()
self.metadata = metadata or ModelCardData()

def add(self, **kwargs: str) -> "Card":
"""Takes values to fill model card template.
Expand Down Expand Up @@ -389,27 +391,26 @@ def _generate_card(self) -> ModelCard:
template_sections = copy.deepcopy(self._template_sections)

if self.metadata:
if self.metadata.to_dict().get("model_file"):
model_file = self.metadata.to_dict().get("model_file")
if model_file.endswith(".skops"):
template_sections["get_started_code"] = (
"from skops.io import load\nimport json\n"
"import pandas as pd\n"
f'clf = load("{model_file}")\n'
'with open("config.json") as f:\n '
" config ="
" json.load(f)\n"
'clf.predict(pd.DataFrame.from_dict(config["sklearn"]["example_input"]))'
)
else:
template_sections["get_started_code"] = (
"import joblib\nimport json\nimport pandas as pd\nclf ="
f' joblib.load({model_file})\nwith open("config.json") as'
" f:\n "
" config ="
" json.load(f)\n"
'clf.predict(pd.DataFrame.from_dict(config["sklearn"]["example_input"]))'
)
model_file = self.metadata.to_dict().get("model_file")
if model_file and model_file.endswith(".skops"):
template_sections["get_started_code"] = (
"from skops.io import load\nimport json\n"
"import pandas as pd\n"
f'clf = load("{model_file}")\n'
'with open("config.json") as f:\n '
" config ="
" json.load(f)\n"
'clf.predict(pd.DataFrame.from_dict(config["sklearn"]["example_input"]))'
)
elif model_file is not None:
template_sections["get_started_code"] = (
"import joblib\nimport json\nimport pandas as pd\nclf ="
f' joblib.load({model_file})\nwith open("config.json") as'
" f:\n "
" config ="
" json.load(f)\n"
'clf.predict(pd.DataFrame.from_dict(config["sklearn"]["example_input"]))'
)
if self.model_diagram is True:
model_plot_div = re.sub(r"\n\s+", "", str(estimator_html_repr(self.model)))
if model_plot_div.count("sk-top-container") == 1:
Expand Down
4 changes: 3 additions & 1 deletion skops/hub_utils/_hf_hub.py
Original file line number Diff line number Diff line change
Expand Up @@ -626,7 +626,9 @@ def get_model_output(repo_id: str, data: Any, token: Optional[str] = None) -> An
Also note that if the model repo is private, the inference API would not be
available.
"""
model_info = HfApi().model_info(repo_id=repo_id, use_auth_token=token)
# TODO: the "type: ignore" should eventually become unncessary when hf_hub
# is updated
model_info = HfApi().model_info(repo_id=repo_id, use_auth_token=token) # type: ignore
if not model_info.pipeline_tag:
raise ValueError(
f"Repo {repo_id} has no pipeline tag. You should set a valid 'task' in"
Expand Down