Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

MNT - Removes sklearn-intelex and updates changelog #420

Merged
merged 4 commits into from
May 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion docs/changes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@ skops Changelog

v0.10
----

- Removes Pythn 3.8 support and adds Python 3.12 Support :pr:`418` by :user:`Thomas Lazarus <lazarust>`.
- Removes a shortcut to add `sklearn-intelex` as a not dependency.
:pr:`420` by :user:`Thomas Lazarus < lazarust > `.

v0.9
----
Expand Down
5 changes: 0 additions & 5 deletions docs/examples.rst
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,6 @@ Examples of interactions with the Hugging Face Hub
- Text Classification:
:ref:`sphx_glr_auto_examples_plot_text_classification.py` is an example of using skops to serialize a text
classification model and create a model card and a Hugging Face Hub repository.
- Using Intel(R) Extension for scikit-learn:
:ref:`sphx_glr_auto_examples_plot_intelex.py` is an example of using
Intel(R) Extension for scikit-learn to speed up inference of classical
machine learning algorithms and how performance-optimized models work with
Hugging Face Hub.
- Long semi-realistic guide using the California Housing dataset:
:ref:`sphx_glr_auto_examples_plot_california_housing.py` is an exercise that
goes through a semi-realistic data science and machine learning task and
Expand Down
1 change: 0 additions & 1 deletion skops/_min_dependencies.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
# "tomli": ("1.1.0", "install", "python_full_version < '3.11.0a7'"),
dependent_packages = {
"scikit-learn": ("0.24", "install", None),
"scikit-learn-intelex": ("2021.7.1", "docs", None),
"huggingface_hub": ("0.17.0", "install", None),
"tabulate": ("0.8.8", "install", None),
"quantile-forest": ("1.0.0", "tests", None),
Expand Down
2 changes: 0 additions & 2 deletions skops/card/_model_card.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,8 +106,6 @@ def metadata_from_config(config_path: Union[str, Path]) -> ModelCardData:
if task:
card_data.tags += [task]
card_data.model_file = config.get("sklearn", {}).get("model", {}).get("file") # type: ignore
if config.get("sklearn", {}).get("use_intelex"):
card_data.tags.append("scikit-learn-intelex")

example_input = config.get("sklearn", {}).get("example_input", None)
# Documentation on what the widget expects:
Expand Down
32 changes: 0 additions & 32 deletions skops/card/tests/test_card.py
Original file line number Diff line number Diff line change
Expand Up @@ -1197,38 +1197,6 @@ def test_metadata_model_format_skops(
metadata = metadata_load(local_path=Path(destination_path) / "README.md")
assert metadata["model_format"] == "skops"

def test_metadata_tags_without_sklearn_intelex_tag(
self, destination_path, iris_data, iris_pkl_file
):
# by default, intelex is not being used
X, _ = iris_data
hub_utils.init(
model=iris_pkl_file,
requirements=[],
dst=destination_path,
task="tabular-classification",
data=X,
)

metadata = metadata_from_config(destination_path)
assert "scikit-learn-intelex" not in metadata.tags

def test_metadata_tags_with_sklearn_intelex_tag(
self, destination_path, iris_data, iris_pkl_file
):
X, _ = iris_data
hub_utils.init(
model=iris_pkl_file,
requirements=[],
dst=destination_path,
task="tabular-classification",
data=X,
use_intelex=True,
)

metadata = metadata_from_config(destination_path)
assert "scikit-learn-intelex" in metadata.tags


@pytest.mark.xfail(reason="dynamic adjustment when model changes not implemented yet")
class TestModelDynamicUpdate:
Expand Down
26 changes: 0 additions & 26 deletions skops/hub_utils/_hf_hub.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,6 @@ def _create_config(
"pickle",
"auto",
] = "auto",
use_intelex: bool = False,
) -> None:
"""Write the configuration into a ``config.json`` file.

Expand Down Expand Up @@ -256,14 +255,6 @@ def _create_config(

- ``"pickle"`` if the extension is one of ``{".pickle", ".pkl", ".joblib"}``
- ``"skops"`` if the extension is ``".skops"``

use_intelex: bool (default=False)
Whether to enable ``scikit-learn-intelex``. This can accelerate some
sklearn models by a large factor with the right hardware. In most cases,
enabling this option should not break any code, even if the model was
not initially trained with scikit-learn intelex and even if the hardware
does not support it. For more info, see
https://intel.github.io/scikit-learn-intelex/.
"""

# so that we don't have to explicitly add keys and they're added as a
Expand All @@ -289,7 +280,6 @@ def recursively_default_dict() -> MutableMapping:
config["sklearn"]["environment"] = requirements
config["sklearn"]["task"] = task
config["sklearn"]["model_format"] = model_format
config["sklearn"]["use_intelex"] = use_intelex

if "tabular" in task:
config["sklearn"]["example_input"] = _get_example_input_from_tabular_data(data)
Expand Down Expand Up @@ -344,7 +334,6 @@ def init(
"pickle",
"auto",
] = "auto",
use_intelex: bool = False,
) -> None:
"""Initialize a scikit-learn based Hugging Face repo.

Expand Down Expand Up @@ -388,14 +377,6 @@ def init(
model_format: str (default="auto")
The format the model was persisted in. Can be ``"auto"``, ``"skops"``
or ``"pickle"``. Defaults to ``"auto"`` that relies on file extension.

use_intelex: bool (default=False)
Whether to enable ``scikit-learn-intelex``. This can accelerate some
sklearn models by a large factor with the right hardware. In most cases,
enabling this option should not break any code, even if the model was
not initially trained with scikit-learn intelex and even if the hardware
does not support it. For more info, see
https://intel.github.io/scikit-learn-intelex/.
"""
dst = Path(dst)
if dst.exists() and bool(next(dst.iterdir(), None)):
Expand All @@ -410,12 +391,6 @@ def init(

dst.mkdir(parents=True, exist_ok=True)

# add intelex requirement, if it's used and not already in requirements
if use_intelex and not any(
r.startswith("scikit-learn-intelex") for r in requirements
):
requirements.append("scikit-learn-intelex")

try:
shutil.copy2(src=model, dst=dst)

Expand All @@ -427,7 +402,6 @@ def init(
task=task,
data=data,
model_format=model_format,
use_intelex=use_intelex,
)
except Exception:
shutil.rmtree(dst)
Expand Down
48 changes: 0 additions & 48 deletions skops/hub_utils/tests/test_hf_hub.py
Original file line number Diff line number Diff line change
Expand Up @@ -652,51 +652,3 @@ def test_adding_existing_file_raises(self, init_path, some_file_0):
)
with pytest.raises(FileExistsError, match=msg):
add_files(some_file_0, dst=init_path)


class TestUseIntelex:
# Tests related to the usage of scikit-learn intelex, see #251
def make_config(self, model, requirements, **kwargs):
dir_path = tempfile.mkdtemp()
shutil.rmtree(dir_path)

init(
model=model,
dst=dir_path,
task="tabular-classification",
data=iris.data,
requirements=requirements,
**kwargs,
)
config = get_config(dir_path)
return config

def test_no_intelex(self, classifier):
# by default, intelex is not being used
config = self.make_config(model=classifier, requirements=["foobar"])
environement = config["sklearn"]["environment"]

assert config["sklearn"]["use_intelex"] is False
assert not any(r.startswith("scikit-learn-intelex") for r in environement)

def test_use_intelex_but_not_explicitly_in_requirements(self, classifier):
# when using intelex, if it's not explicitly in the environment, add it
# automatically
config = self.make_config(
model=classifier, requirements=["foobar"], use_intelex=True
)
environement = config["sklearn"]["environment"]

assert config["sklearn"]["use_intelex"] is True
assert any(r == "scikit-learn-intelex" for r in environement)

def test_use_intelex_explicitly_in_requirements(self, classifier):
# when users specify intelex explicitly, it's not added automatically to
# the requirements
reqs = ["foobar", "scikit-learn-intelex==2023.0.0"]
config = self.make_config(model=classifier, requirements=reqs, use_intelex=True)
environement = config["sklearn"]["environment"]

assert config["sklearn"]["use_intelex"] is True
assert not any(r == "scikit-learn-intelex" for r in environement)
assert any(r == "scikit-learn-intelex==2023.0.0" for r in environement)