diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index f55139d32..6cf74a9ab 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -36,7 +36,7 @@ jobs: strategy: fail-fast: true matrix: - python-version: ["3.7", "3.8", "3.9"] + python-version: ["3.7", "3.8", "3.9", "3.10"] steps: - uses: actions/checkout@v2 @@ -75,10 +75,17 @@ jobs: run: | pip install poetry pip install -e '.[dev,setup]' pytest-xdist pip + if: matrix.python-version != '3.10' # uv venv # source .venv/bin/activate # uv pip install -e '.[dev,setup]' pytest-xdist pip + - name: Install dependencies + run: | + pip install poetry + pip install -e '.[dev-no-ml,setup]' pytest-xdist pip + if: matrix.python-version == '3.10' + - name: Test with Pytest on Python ${{ matrix.python-version }} env: UMLS_API_KEY: ${{ secrets.UMLS_API_KEY }} diff --git a/changelog.md b/changelog.md index 99ee2c442..ef4ee37ff 100644 --- a/changelog.md +++ b/changelog.md @@ -23,6 +23,7 @@ - Bubble BaseComponent instantiation errors correctly - Improved support for multi-gpu gradient accumulation (only sync the gradients at the end of the accumulation), now controled by the optiona `sub_batch_size` argument of `TrainingData`. - Support again edsnlp without pytorch installed +- We now test that edsnlp works without pytorch installed ## v0.14.0 (2024-11-14) diff --git a/pyproject.toml b/pyproject.toml index a39592e3f..d81c24fba 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,7 +36,7 @@ dependencies = [ "pydantic-core<2.0.0; python_version<'3.8'", ] [project.optional-dependencies] -dev = [ +dev-no-ml = [ "pre-commit>=2.0.0; python_version<'3.8'", "pre-commit>=2.21.0; python_version>='3.8'", "pytest>=7.1.0", @@ -48,35 +48,12 @@ dev = [ "pyspark", "polars", - # Machine Learning - "rich-logger>=0.3.1", - "torch>=1.13.0", - "foldedtensor>=0.3.2", - "safetensors>=0.3.0", - "transformers>=4.0.0,<5.0.0", - "accelerate>=0.20.3,<1.0.0", "mlconjug3<3.9.0", "scikit-learn>=1.0.0", - # Docs (same as docs group) - "mike~=1.1.2", - "mkdocs-charts-plugin==0.0.8", - "mkdocs-img2fig-plugin==0.9.3", - "mkdocs-material~=9.2.0", - "mkdocs-section-index==0.3.4", - "mkdocs~=1.5.2", - "mkdocstrings~=0.20", - "mkdocstrings-python~=1.1", - "mkdocs-minify-plugin", - "mkdocs-redirects>=1.2.1;python_version>='3.8'", - "pybtex~=0.24.0", - "pathspec>=0.11.1", # required by vendored mkdocs-autorefs PR - "astunparse", - "griffe<0.39", - "jedi", - "html5lib", + "edsnlp[docs-no-ml]", ] -docs = [ +docs-no-ml = [ "mike~=1.1.2", "mkdocs-charts-plugin==0.0.8", "mkdocs-img2fig-plugin==0.9.3", @@ -93,15 +70,6 @@ docs = [ "griffe<0.39", "jedi", "html5lib", - - "torch>=1.13.0", - "foldedtensor>=0.3.2", - "transformers>=4.0.0,<5.0.0", - "safetensors>=0.3.0", - "rich-logger>=0.3.1", -] -setup = [ - "typer" ] ml = [ "rich-logger>=0.3.1", @@ -111,6 +79,17 @@ ml = [ "transformers>=4.0.0,<5.0.0", "accelerate>=0.20.3,<1.0.0", ] +docs = [ + "edsnlp[docs-no-ml]", + "edsnlp[ml]", +] +dev = [ + "edsnlp[dev-no-ml]", + "edsnlp[ml]", +] +setup = [ + "typer" +] [project.urls] "Source Code" = "https://github.com/aphp/edsnlp" diff --git a/tests/conftest.py b/tests/conftest.py index 076f16ec1..54578d415 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -19,6 +19,12 @@ except AttributeError: pass logging.basicConfig(level=logging.INFO) +try: + import torch.nn +except ImportError: + torch = None + +pytest.importorskip("rich") @fixture(scope="session", params=["eds", "fr"]) @@ -75,11 +81,15 @@ def make_ml_pipeline(): @fixture() def ml_nlp(): + if torch is None: + pytest.skip("torch not installed", allow_module_level=False) return make_ml_pipeline() @fixture(scope="session") def frozen_ml_nlp(): + if torch is None: + pytest.skip("torch not installed", allow_module_level=False) return make_ml_pipeline() diff --git a/tests/data/test_stream.py b/tests/data/test_stream.py index b1026d486..1f5c62b50 100644 --- a/tests/data/test_stream.py +++ b/tests/data/test_stream.py @@ -3,6 +3,11 @@ import edsnlp from edsnlp.utils.collections import ld_to_dl +try: + import torch.nn +except ImportError: + torch = None + def test_map_batches(): items = [1, 2, 3, 4, 5] @@ -30,6 +35,7 @@ def test_flat_iterable(num_cpu_workers): @pytest.mark.parametrize("num_gpu_workers", [0, 1, 2]) +@pytest.mark.skipif(torch is None, reason="torch not installed") def test_map_gpu(num_gpu_workers): import torch diff --git a/tests/pipelines/test_pipelines.py b/tests/pipelines/test_pipelines.py index 596266950..8c12941c0 100644 --- a/tests/pipelines/test_pipelines.py +++ b/tests/pipelines/test_pipelines.py @@ -13,4 +13,8 @@ def test_import_all(): for name in dir(edsnlp.pipes): if not name.startswith("_") and "endlines" not in name: - getattr(edsnlp.pipes, name) + try: + getattr(edsnlp.pipes, name) + except (ImportError, AttributeError) as e: + if "torch" in str(e): + pass diff --git a/tests/pipelines/trainable/conftest.py b/tests/pipelines/trainable/conftest.py new file mode 100644 index 000000000..705ca760b --- /dev/null +++ b/tests/pipelines/trainable/conftest.py @@ -0,0 +1,10 @@ +import pytest + +try: + import torch.nn +except ImportError: + torch = None + +if torch is None: + pytest.skip("torch not installed", allow_module_level=True) +pytest.importorskip("rich") diff --git a/tests/processing/test_backends.py b/tests/processing/test_backends.py index c5724ce8d..6b6e4793f 100644 --- a/tests/processing/test_backends.py +++ b/tests/processing/test_backends.py @@ -14,6 +14,12 @@ from edsnlp.data.converters import get_current_tokenizer from edsnlp.processing.multiprocessing import get_dispatch_schedule +try: + import torch.nn +except ImportError: + torch = None + + docs = [ { "note_id": 1234, @@ -259,10 +265,8 @@ def test_multiprocessing_rb_error(ml_nlp): list(docs) -try: - import torch - - from edsnlp.core.torch_component import BatchInput, BatchOutput, TorchComponent +if torch is not None: + from edsnlp.core.torch_component import TorchComponent class DeepLearningError(TorchComponent): def __init__(self, *args, **kwargs): @@ -282,10 +286,8 @@ def forward(self, batch): raise RuntimeError("Deep learning error") return {} -except (ImportError, AttributeError): - pass - +@pytest.mark.skipif(torch is None, reason="torch not installed") def test_multiprocessing_ml_error(ml_nlp): text1 = "Ceci est un exemple" text2 = "Ceci est un autre exemple" @@ -376,54 +378,60 @@ def process_batch(x): assert items == [*range(0, 10), *range(10, 100, 2)] -@validate_arguments -class InnerComponent(TorchComponent): - def __init__(self, nlp=None, *args, **kwargs): - super().__init__() - self.called_forward = False - - def preprocess(self, doc): - return {"text": doc.text} +@pytest.mark.parametrize( + "backend", + ["simple", "multiprocesing"], +) +@pytest.mark.skipif(torch is None, reason="torch not installed") +def test_backend_cache(backend): + import torch - def collate(self, batch: Dict[str, Any]) -> BatchInput: - return {"sizes": torch.as_tensor([len(x) for x in batch["text"]])} + from edsnlp.core.torch_component import ( + BatchInput, + BatchOutput, + TorchComponent, + _caches, + ) - def forward(self, batch): - assert not self.called_forward - self.called_forward = True - return {"sizes": batch["sizes"] * 2} + @validate_arguments + class InnerComponent(TorchComponent): + def __init__(self, nlp=None, *args, **kwargs): + super().__init__() + self.called_forward = False + def preprocess(self, doc): + return {"text": doc.text} -@validate_arguments -class OuterComponent(TorchComponent): - def __init__(self, inner): - super().__init__() - self.inner = inner + def collate(self, batch: Dict[str, Any]) -> BatchInput: + return {"sizes": torch.as_tensor([len(x) for x in batch["text"]])} - def preprocess(self, doc): - return {"inner": self.inner.preprocess(doc)} + def forward(self, batch): + assert not self.called_forward + self.called_forward = True + return {"sizes": batch["sizes"] * 2} - def collate(self, batch: Dict[str, Any]) -> BatchInput: - return {"inner": self.inner.collate(batch["inner"])} + @validate_arguments + class OuterComponent(TorchComponent): + def __init__(self, inner): + super().__init__() + self.inner = inner - def forward(self, batch: BatchInput) -> BatchOutput: - return {"inner": self.inner(batch["inner"])["sizes"].clone()} + def preprocess(self, doc): + return {"inner": self.inner.preprocess(doc)} - def postprocess( - self, - docs: Sequence[Doc], - results: BatchOutput, - inputs: List[Dict[str, Any]], - ) -> Sequence[Doc]: - return docs + def collate(self, batch: Dict[str, Any]) -> BatchInput: + return {"inner": self.inner.collate(batch["inner"])} + def forward(self, batch: BatchInput) -> BatchOutput: + return {"inner": self.inner(batch["inner"])["sizes"].clone()} -@pytest.mark.parametrize( - "backend", - ["simple", "multiprocesing"], -) -def test_backend_cache(backend): - from edsnlp.core.torch_component import _caches + def postprocess( + self, + docs: Sequence[Doc], + results: BatchOutput, + inputs: List[Dict[str, Any]], + ) -> Sequence[Doc]: + return docs nlp = edsnlp.blank("eds") nlp.add_pipe(InnerComponent(), name="inner") diff --git a/tests/test_docs.py b/tests/test_docs.py index e89c251b5..5bc4ef1f9 100644 --- a/tests/test_docs.py +++ b/tests/test_docs.py @@ -4,6 +4,14 @@ import pytest pytest.importorskip("mkdocs") +try: + import torch.nn +except ImportError: + torch = None + +if torch is None: + pytest.skip("torch not installed", allow_module_level=True) +pytest.importorskip("rich") from extract_docs_code import extract_docs_code # noqa: E402 diff --git a/tests/test_entrypoints.py b/tests/test_entrypoints.py index 084d287ec..e1750e25a 100644 --- a/tests/test_entrypoints.py +++ b/tests/test_entrypoints.py @@ -1,10 +1,19 @@ import catalogue +import pytest try: from importlib.metadata import entry_points except ImportError: from importlib_metadata import entry_points +try: + import torch.nn +except ImportError: + torch = None + +if torch is None: + pytest.skip("torch not installed", allow_module_level=True) + def test_entrypoints(): ep = entry_points() diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py index d4734498d..cbea7e311 100644 --- a/tests/test_pipeline.py +++ b/tests/test_pipeline.py @@ -15,6 +15,11 @@ from edsnlp.core.registries import CurriedFactory from edsnlp.pipes.base import BaseComponent +try: + import torch.nn +except ImportError: + torch = None + class CustomClass: pass @@ -143,6 +148,7 @@ def test_disk_serialization(tmp_path, ml_nlp): """ +@pytest.mark.skipif(torch is None, reason="torch not installed") def test_validate_config(): @validate_arguments def function(model: Pipeline): @@ -212,6 +218,7 @@ def test_different_names(): ) in str(exc_info.value) +@pytest.mark.skipif(torch is None, reason="torch not installed") def test_load_config(run_in_test_dir): nlp = edsnlp.load("training/qlf_config.yml") assert nlp.pipe_names == [ @@ -246,6 +253,7 @@ def test_load_config(run_in_test_dir): """ +@pytest.mark.skipif(torch is None, reason="torch not installed") def test_config_validation_error(): with pytest.raises(ConfitValidationError) as e: Pipeline.from_config(Config.from_str(fail_config)) @@ -397,6 +405,7 @@ def test_curried_nlp_pipe(): sys.version_info < (3, 8), reason="Can't run on GH CI with Python 3.7", ) +@pytest.mark.skipif(torch is None, reason="torch not installed") def test_huggingface(): nlp = edsnlp.load( "AP-HP/dummy-ner", diff --git a/tests/training/test_optimizer.py b/tests/training/test_optimizer.py index fc029f578..5ee59fd3c 100644 --- a/tests/training/test_optimizer.py +++ b/tests/training/test_optimizer.py @@ -1,5 +1,14 @@ +# ruff:noqa:E402 import pytest -import torch + +try: + import torch.nn +except ImportError: + torch = None + +if torch is None: + pytest.skip("torch not installed", allow_module_level=True) +pytest.importorskip("rich") from edsnlp.training.optimizer import LinearSchedule, ScheduledOptimizer @@ -73,12 +82,12 @@ def test_old_parameter_selection(net, groups): assert all([p in optim.state for p in net.fc1.parameters()]) optim.state = optim.state - fc1_group = optim.param_groups[0] + fc1_group = optim.param_groups[1] assert fc1_group["lr"] == pytest.approx(0.0) assert fc1_group["weight_decay"] == pytest.approx(0.01) assert set(fc1_group["params"]) == {net.fc1.weight, net.fc1.bias} - fc2_group = optim.param_groups[1] + fc2_group = optim.param_groups[0] assert fc2_group["lr"] == pytest.approx(0.0001) assert set(fc2_group["params"]) == {net.fc2.weight} @@ -132,9 +141,9 @@ def test_serialization(net): state_dict = optim.state_dict() optim.step() - assert optim.param_groups[0]["lr"] == pytest.approx(0.0) + assert optim.param_groups[-1]["lr"] == pytest.approx(0.0) optim.load_state_dict(state_dict) - assert optim.param_groups[0]["lr"] == pytest.approx(0.0625) + assert optim.param_groups[-1]["lr"] == pytest.approx(0.0625) optim.reset() diff --git a/tests/training/test_train.py b/tests/training/test_train.py index b7e2ac36c..b5eca9f07 100644 --- a/tests/training/test_train.py +++ b/tests/training/test_train.py @@ -1,9 +1,14 @@ -# ruff: noqa: E402 +# ruff:noqa:E402 import pytest -from edsnlp.metrics.dep_parsing import DependencyParsingMetric +try: + import torch.nn +except ImportError: + torch = None +if torch is None: + pytest.skip("torch not installed", allow_module_level=True) pytest.importorskip("rich") import shutil @@ -22,6 +27,7 @@ from edsnlp.core.registries import registry from edsnlp.data.converters import AttributesMappingArg, get_current_tokenizer +from edsnlp.metrics.dep_parsing import DependencyParsingMetric from edsnlp.training.optimizer import LinearSchedule, ScheduledOptimizer from edsnlp.training.trainer import GenericScorer, train from edsnlp.utils.span_getters import SpanSetterArg, set_spans