From c191983240a99a30ec51352223468795a2868b0a Mon Sep 17 00:00:00 2001 From: Benjamin Bossan Date: Mon, 14 Nov 2022 13:38:04 +0100 Subject: [PATCH 01/10] [CI inference] Testing hf_hub v0.11.0rc0 --- .github/workflows/build-test.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/build-test.yml b/.github/workflows/build-test.yml index d9aeb5fc..4d59c85a 100644 --- a/.github/workflows/build-test.yml +++ b/.github/workflows/build-test.yml @@ -55,6 +55,8 @@ jobs: - name: Install dependencies run: | pip install .[docs,tests] + pip uninstall huggingface_hub + pip install huggingface_hub=="0.11.0rc0" pip install black=="22.6.0" isort=="5.10.1" mypy=="0.981" pip uninstall --yes scikit-learn if [ ${{ matrix.sklearn_version }} == "nightly" ]; From a7d7bdf83d26f964b13cc888232909852fd39d5b Mon Sep 17 00:00:00 2001 From: Benjamin Bossan Date: Mon, 14 Nov 2022 13:43:30 +0100 Subject: [PATCH 02/10] add --yes flag --- .github/workflows/build-test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build-test.yml b/.github/workflows/build-test.yml index 4d59c85a..f8080ce8 100644 --- a/.github/workflows/build-test.yml +++ b/.github/workflows/build-test.yml @@ -55,7 +55,7 @@ jobs: - name: Install dependencies run: | pip install .[docs,tests] - pip uninstall huggingface_hub + pip uninstall --yes huggingface_hub pip install huggingface_hub=="0.11.0rc0" pip install black=="22.6.0" isort=="5.10.1" mypy=="0.981" pip uninstall --yes scikit-learn From 418643c5a31899679e62ca348bc56ae283cf0266 Mon Sep 17 00:00:00 2001 From: Benjamin Bossan Date: Mon, 14 Nov 2022 17:30:39 +0100 Subject: [PATCH 03/10] Fixes for mypy errors - Use ModelCardData instead of CardData - Use token instead of use_auth_token - If model_file is not found, default to joblib load instruction --- skops/card/_model_card.py | 34 ++++++++++++++++++---------------- skops/hub_utils/_hf_hub.py | 2 +- 2 files changed, 19 insertions(+), 17 deletions(-) diff --git a/skops/card/_model_card.py b/skops/card/_model_card.py index 355fc630..6feec562 100644 --- a/skops/card/_model_card.py +++ b/skops/card/_model_card.py @@ -10,7 +10,7 @@ from reprlib import Repr from typing import Any, Optional, Union -from huggingface_hub import CardData, ModelCard +from huggingface_hub import ModelCard, ModelCardData from sklearn.utils import estimator_html_repr from tabulate import tabulate # type: ignore @@ -107,12 +107,12 @@ def __repr__(self) -> str: return f"Table({nrows}x{ncols})" -def metadata_from_config(config_path: Union[str, Path]) -> CardData: - """Construct a ``CardData`` object from a ``config.json`` file. +def metadata_from_config(config_path: Union[str, Path]) -> ModelCardData: + """Construct a ``ModelCardData`` object from a ``config.json`` file. Most information needed for the metadata section of a ``README.md`` file on Hugging Face Hub is included in the ``config.json`` file. This utility - function constructs a :class:`huggingface_hub.CardData` object which can + function constructs a :class:`huggingface_hub.ModelCardData` object which can then be passed to the :class:`~skops.card.Card` object. This method populates the following attributes of the instance: @@ -133,8 +133,8 @@ def metadata_from_config(config_path: Union[str, Path]) -> CardData: Returns ------- - card_data: huggingface_hub.CardData - :class:`huggingface_hub.CardData` object. + card_data: huggingface_hub.ModelCardData + :class:`huggingface_hub.ModelCardData` object. """ config_path = Path(config_path) @@ -144,19 +144,19 @@ def metadata_from_config(config_path: Union[str, Path]) -> CardData: with open(config_path) as f: config = json.load(f) - card_data = CardData() + card_data = ModelCardData() card_data.library_name = "sklearn" card_data.tags = ["sklearn", "skops"] task = config.get("sklearn", {}).get("task", None) if task: card_data.tags += [task] - card_data.model_file = config.get("sklearn", {}).get("model", {}).get("file") + card_data.model_file = config.get("sklearn", {}).get("model", {}).get("file") # type: ignore example_input = config.get("sklearn", {}).get("example_input", None) # Documentation on what the widget expects: # https://huggingface.co/docs/hub/models-widgets-examples if example_input: if "tabular" in task: - card_data.widget = {"structuredData": example_input} + card_data.widget = {"structuredData": example_input} # type: ignore # TODO: add text data example here. return card_data @@ -178,9 +178,10 @@ class Card: model_diagram: bool, default=True Set to True if model diagram should be plotted in the card. - metadata: CardData, optional - ``CardData`` object. The contents of this object are saved as metadata - at the beginning of the output file, and used by Hugging Face Hub. + metadata: ModelCardData, optional + :class:`huggingface_hub.ModelCardData` object. The contents of this + object are saved as metadata at the beginning of the output file, and + used by Hugging Face Hub. You can use :func:`~skops.card.metadata_from_config` to create an instance pre-populated with necessary information based on the contents @@ -192,7 +193,7 @@ class Card: model: estimator object The scikit-learn compatible model that will be documented. - metadata: CardData + metadata: ModelCardData Metadata to be stored at the beginning of the saved model card, as metadata to be understood by the Hugging Face Hub. @@ -246,20 +247,21 @@ class Card: confusion_matrix='...confusion_matrix.png', ) >>> model_card.save(tmp_path / "README.md") + """ def __init__( self, model: Any, model_diagram: bool = True, - metadata: Optional[CardData] = None, + metadata: Optional[ModelCardData] = None, ) -> None: self.model = model self.model_diagram = model_diagram self._eval_results = {} # type: ignore self._template_sections: dict[str, str] = {} self._extra_sections: list[tuple[str, Any]] = [] - self.metadata = metadata or CardData() + self.metadata = metadata or ModelCardData() def add(self, **kwargs: str) -> "Card": """Takes values to fill model card template. @@ -391,7 +393,7 @@ def _generate_card(self) -> ModelCard: if self.metadata: if self.metadata.to_dict().get("model_file"): model_file = self.metadata.to_dict().get("model_file") - if model_file.endswith(".skops"): + if model_file and model_file.endswith(".skops"): template_sections["get_started_code"] = ( "from skops.io import load\nimport json\n" "import pandas as pd\n" diff --git a/skops/hub_utils/_hf_hub.py b/skops/hub_utils/_hf_hub.py index 6d361972..5d7a629b 100644 --- a/skops/hub_utils/_hf_hub.py +++ b/skops/hub_utils/_hf_hub.py @@ -626,7 +626,7 @@ def get_model_output(repo_id: str, data: Any, token: Optional[str] = None) -> An Also note that if the model repo is private, the inference API would not be available. """ - model_info = HfApi().model_info(repo_id=repo_id, use_auth_token=token) + model_info = HfApi().model_info(repo_id=repo_id, token=token) if not model_info.pipeline_tag: raise ValueError( f"Repo {repo_id} has no pipeline tag. You should set a valid 'task' in" From 5326bc1d147f9ee49b59383d5e01a940f7059d6b Mon Sep 17 00:00:00 2001 From: Benjamin Bossan Date: Mon, 14 Nov 2022 17:41:50 +0100 Subject: [PATCH 04/10] [CI inference] empty commit to trigger inf tests From b2272730a931e07ac84bbb979ec9c47d0174208c Mon Sep 17 00:00:00 2001 From: Benjamin Bossan Date: Mon, 14 Nov 2022 18:30:32 +0100 Subject: [PATCH 05/10] [CI inference] Simplify code to check model_file --- skops/card/_model_card.py | 41 +++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 21 deletions(-) diff --git a/skops/card/_model_card.py b/skops/card/_model_card.py index 6feec562..915fa7d8 100644 --- a/skops/card/_model_card.py +++ b/skops/card/_model_card.py @@ -391,27 +391,26 @@ def _generate_card(self) -> ModelCard: template_sections = copy.deepcopy(self._template_sections) if self.metadata: - if self.metadata.to_dict().get("model_file"): - model_file = self.metadata.to_dict().get("model_file") - if model_file and model_file.endswith(".skops"): - template_sections["get_started_code"] = ( - "from skops.io import load\nimport json\n" - "import pandas as pd\n" - f'clf = load("{model_file}")\n' - 'with open("config.json") as f:\n ' - " config =" - " json.load(f)\n" - 'clf.predict(pd.DataFrame.from_dict(config["sklearn"]["example_input"]))' - ) - else: - template_sections["get_started_code"] = ( - "import joblib\nimport json\nimport pandas as pd\nclf =" - f' joblib.load({model_file})\nwith open("config.json") as' - " f:\n " - " config =" - " json.load(f)\n" - 'clf.predict(pd.DataFrame.from_dict(config["sklearn"]["example_input"]))' - ) + model_file = self.metadata.to_dict().get("model_file") + if model_file and model_file.endswith(".skops"): + template_sections["get_started_code"] = ( + "from skops.io import load\nimport json\n" + "import pandas as pd\n" + f'clf = load("{model_file}")\n' + 'with open("config.json") as f:\n ' + " config =" + " json.load(f)\n" + 'clf.predict(pd.DataFrame.from_dict(config["sklearn"]["example_input"]))' + ) + elif model_file is not None: + template_sections["get_started_code"] = ( + "import joblib\nimport json\nimport pandas as pd\nclf =" + f' joblib.load({model_file})\nwith open("config.json") as' + " f:\n " + " config =" + " json.load(f)\n" + 'clf.predict(pd.DataFrame.from_dict(config["sklearn"]["example_input"]))' + ) if self.model_diagram is True: model_plot_div = re.sub(r"\n\s+", "", str(estimator_html_repr(self.model))) if model_plot_div.count("sk-top-container") == 1: From e4d1ead522aa261b343848ec5ce48c86d0665502 Mon Sep 17 00:00:00 2001 From: Benjamin Bossan Date: Mon, 14 Nov 2022 19:33:23 +0100 Subject: [PATCH 06/10] More fixes --- .github/workflows/build-test.yml | 2 -- skops/hub_utils/_hf_hub.py | 4 +++- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build-test.yml b/.github/workflows/build-test.yml index f8080ce8..d9aeb5fc 100644 --- a/.github/workflows/build-test.yml +++ b/.github/workflows/build-test.yml @@ -55,8 +55,6 @@ jobs: - name: Install dependencies run: | pip install .[docs,tests] - pip uninstall --yes huggingface_hub - pip install huggingface_hub=="0.11.0rc0" pip install black=="22.6.0" isort=="5.10.1" mypy=="0.981" pip uninstall --yes scikit-learn if [ ${{ matrix.sklearn_version }} == "nightly" ]; diff --git a/skops/hub_utils/_hf_hub.py b/skops/hub_utils/_hf_hub.py index 5d7a629b..07c6c323 100644 --- a/skops/hub_utils/_hf_hub.py +++ b/skops/hub_utils/_hf_hub.py @@ -626,7 +626,9 @@ def get_model_output(repo_id: str, data: Any, token: Optional[str] = None) -> An Also note that if the model repo is private, the inference API would not be available. """ - model_info = HfApi().model_info(repo_id=repo_id, token=token) + # TODO: the "type: ignore" should eventually become unncessary when hf_hub + # is updated + model_info = HfApi().model_info(repo_id=repo_id, use_auth_token=token) # type: ignore if not model_info.pipeline_tag: raise ValueError( f"Repo {repo_id} has no pipeline tag. You should set a valid 'task' in" From c3a018a25f0e97130ed6ee544bc010db77f046c8 Mon Sep 17 00:00:00 2001 From: Benjamin Bossan Date: Mon, 14 Nov 2022 19:34:05 +0100 Subject: [PATCH 07/10] Line break in docstring --- skops/card/_model_card.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/skops/card/_model_card.py b/skops/card/_model_card.py index 915fa7d8..e93cdabc 100644 --- a/skops/card/_model_card.py +++ b/skops/card/_model_card.py @@ -112,8 +112,8 @@ def metadata_from_config(config_path: Union[str, Path]) -> ModelCardData: Most information needed for the metadata section of a ``README.md`` file on Hugging Face Hub is included in the ``config.json`` file. This utility - function constructs a :class:`huggingface_hub.ModelCardData` object which can - then be passed to the :class:`~skops.card.Card` object. + function constructs a :class:`huggingface_hub.ModelCardData` object which + can then be passed to the :class:`~skops.card.Card` object. This method populates the following attributes of the instance: From 1c9d2b7d578954410a09485b5e2dacd8ecf1e0f0 Mon Sep 17 00:00:00 2001 From: Benjamin Bossan Date: Mon, 14 Nov 2022 19:42:30 +0100 Subject: [PATCH 08/10] [CI inference] empty commit to trigger inf tests From bba30d052901378acabf97c927691e1464740760 Mon Sep 17 00:00:00 2001 From: Benjamin Bossan Date: Thu, 17 Nov 2022 11:13:59 +0100 Subject: [PATCH 09/10] [CI inference] Test new hf_hub RC1 --- .github/workflows/build-test.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/build-test.yml b/.github/workflows/build-test.yml index d9aeb5fc..de867aa8 100644 --- a/.github/workflows/build-test.yml +++ b/.github/workflows/build-test.yml @@ -54,6 +54,7 @@ jobs: - name: Install dependencies run: | + pip install huggingface_hub=="0.11.0rc1" pip install .[docs,tests] pip install black=="22.6.0" isort=="5.10.1" mypy=="0.981" pip uninstall --yes scikit-learn From 74d4bf09c3968dd4fd92afb03e525c7926e07c7f Mon Sep 17 00:00:00 2001 From: Benjamin Bossan Date: Thu, 17 Nov 2022 11:27:49 +0100 Subject: [PATCH 10/10] Remove hf_hub RC install --- .github/workflows/build-test.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/build-test.yml b/.github/workflows/build-test.yml index de867aa8..d9aeb5fc 100644 --- a/.github/workflows/build-test.yml +++ b/.github/workflows/build-test.yml @@ -54,7 +54,6 @@ jobs: - name: Install dependencies run: | - pip install huggingface_hub=="0.11.0rc1" pip install .[docs,tests] pip install black=="22.6.0" isort=="5.10.1" mypy=="0.981" pip uninstall --yes scikit-learn