test: support pytorch-less tests

aphp · Dec 12, 2024 · 352247c · 352247c
1 parent 3da7bcf
commit 352247c
Show file tree

Hide file tree

Showing 13 changed files with 155 additions and 89 deletions.
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -36,7 +36,7 @@ jobs:
     strategy:
       fail-fast: true
       matrix:
-        python-version: ["3.7", "3.8", "3.9"]
+        python-version: ["3.7", "3.8", "3.9", "3.10"]
     steps:
       - uses: actions/checkout@v2
 
@@ -75,10 +75,17 @@ jobs:
         run: |
           pip install poetry
           pip install -e '.[dev,setup]' pytest-xdist pip
+        if: matrix.python-version != '3.10'
 #          uv venv
 #          source .venv/bin/activate
 #          uv pip install -e '.[dev,setup]' pytest-xdist pip
 
+      - name: Install dependencies
+        run: |
+          pip install poetry
+          pip install -e '.[dev-no-ml,setup]' pytest-xdist pip
+        if: matrix.python-version == '3.10'
+
       - name: Test with Pytest on Python ${{ matrix.python-version }}
         env:
           UMLS_API_KEY: ${{ secrets.UMLS_API_KEY }}

diff --git a/changelog.md b/changelog.md
@@ -23,6 +23,7 @@
 - Bubble BaseComponent instantiation errors correctly
 - Improved support for multi-gpu gradient accumulation (only sync the gradients at the end of the accumulation), now controled by the optiona `sub_batch_size` argument of `TrainingData`.
 - Support again edsnlp without pytorch installed
+- We now test that edsnlp works without pytorch installed
 
 ## v0.14.0 (2024-11-14)
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -36,7 +36,7 @@ dependencies = [
     "pydantic-core<2.0.0; python_version<'3.8'",
 ]
 [project.optional-dependencies]
-dev = [
+dev-no-ml = [
     "pre-commit>=2.0.0; python_version<'3.8'",
     "pre-commit>=2.21.0; python_version>='3.8'",
     "pytest>=7.1.0",
@@ -48,35 +48,12 @@ dev = [
     "pyspark",
     "polars",
 
-    # Machine Learning
-    "rich-logger>=0.3.1",
-    "torch>=1.13.0",
-    "foldedtensor>=0.3.2",
-    "safetensors>=0.3.0",
-    "transformers>=4.0.0,<5.0.0",
-    "accelerate>=0.20.3,<1.0.0",
     "mlconjug3<3.9.0",
     "scikit-learn>=1.0.0",
 
-    # Docs (same as docs group)
-    "mike~=1.1.2",
-    "mkdocs-charts-plugin==0.0.8",
-    "mkdocs-img2fig-plugin==0.9.3",
-    "mkdocs-material~=9.2.0",
-    "mkdocs-section-index==0.3.4",
-    "mkdocs~=1.5.2",
-    "mkdocstrings~=0.20",
-    "mkdocstrings-python~=1.1",
-    "mkdocs-minify-plugin",
-    "mkdocs-redirects>=1.2.1;python_version>='3.8'",
-    "pybtex~=0.24.0",
-    "pathspec>=0.11.1",  # required by vendored mkdocs-autorefs PR
-    "astunparse",
-    "griffe<0.39",
-    "jedi",
-    "html5lib",
+    "edsnlp[docs-no-ml]",
 ]
-docs = [
+docs-no-ml = [
     "mike~=1.1.2",
     "mkdocs-charts-plugin==0.0.8",
     "mkdocs-img2fig-plugin==0.9.3",
@@ -93,15 +70,6 @@ docs = [
     "griffe<0.39",
     "jedi",
     "html5lib",
-
-    "torch>=1.13.0",
-    "foldedtensor>=0.3.2",
-    "transformers>=4.0.0,<5.0.0",
-    "safetensors>=0.3.0",
-    "rich-logger>=0.3.1",
-]
-setup = [
-    "typer"
 ]
 ml = [
     "rich-logger>=0.3.1",
@@ -111,6 +79,17 @@ ml = [
     "transformers>=4.0.0,<5.0.0",
     "accelerate>=0.20.3,<1.0.0",
 ]
+docs = [
+    "edsnlp[docs-no-ml]",
+    "edsnlp[ml]",
+]
+dev = [
+    "edsnlp[dev-no-ml]",
+    "edsnlp[ml]",
+]
+setup = [
+    "typer"
+]
 
 [project.urls]
 "Source Code" = "https://github.com/aphp/edsnlp"

diff --git a/tests/conftest.py b/tests/conftest.py
@@ -19,6 +19,12 @@
 except AttributeError:
     pass
 logging.basicConfig(level=logging.INFO)
+try:
+    import torch.nn
+except ImportError:
+    torch = None
+
+pytest.importorskip("rich")
 
 
 @fixture(scope="session", params=["eds", "fr"])
@@ -75,11 +81,15 @@ def make_ml_pipeline():
 
 @fixture()
 def ml_nlp():
+    if torch is None:
+        pytest.skip("torch not installed", allow_module_level=False)
     return make_ml_pipeline()
 
 
 @fixture(scope="session")
 def frozen_ml_nlp():
+    if torch is None:
+        pytest.skip("torch not installed", allow_module_level=False)
     return make_ml_pipeline()
 
 

diff --git a/tests/data/test_stream.py b/tests/data/test_stream.py
@@ -3,6 +3,11 @@
 import edsnlp
 from edsnlp.utils.collections import ld_to_dl
 
+try:
+    import torch.nn
+except ImportError:
+    torch = None
+
 
 def test_map_batches():
     items = [1, 2, 3, 4, 5]
@@ -30,6 +35,7 @@ def test_flat_iterable(num_cpu_workers):
 
 
 @pytest.mark.parametrize("num_gpu_workers", [0, 1, 2])
+@pytest.mark.skipif(torch is None, reason="torch not installed")
 def test_map_gpu(num_gpu_workers):
     import torch
 

diff --git a/tests/pipelines/test_pipelines.py b/tests/pipelines/test_pipelines.py
@@ -13,4 +13,8 @@ def test_import_all():
 
     for name in dir(edsnlp.pipes):
         if not name.startswith("_") and "endlines" not in name:
-            getattr(edsnlp.pipes, name)
+            try:
+                getattr(edsnlp.pipes, name)
+            except (ImportError, AttributeError) as e:
+                if "torch" in str(e):
+                    pass
diff --git a/tests/pipelines/trainable/conftest.py b/tests/pipelines/trainable/conftest.py
@@ -0,0 +1,10 @@
+import pytest
+
+try:
+    import torch.nn
+except ImportError:
+    torch = None
+
+if torch is None:
+    pytest.skip("torch not installed", allow_module_level=True)
+pytest.importorskip("rich")
diff --git a/tests/processing/test_backends.py b/tests/processing/test_backends.py
@@ -14,6 +14,12 @@
 from edsnlp.data.converters import get_current_tokenizer
 from edsnlp.processing.multiprocessing import get_dispatch_schedule
 
+try:
+    import torch.nn
+except ImportError:
+    torch = None
+
+
 docs = [
     {
         "note_id": 1234,
@@ -259,10 +265,8 @@ def test_multiprocessing_rb_error(ml_nlp):
         list(docs)
 
 
-try:
-    import torch
-
-    from edsnlp.core.torch_component import BatchInput, BatchOutput, TorchComponent
+if torch is not None:
+    from edsnlp.core.torch_component import TorchComponent
 
     class DeepLearningError(TorchComponent):
         def __init__(self, *args, **kwargs):
@@ -282,10 +286,8 @@ def forward(self, batch):
                 raise RuntimeError("Deep learning error")
             return {}
 
-except (ImportError, AttributeError):
-    pass
-
 
+@pytest.mark.skipif(torch is None, reason="torch not installed")
 def test_multiprocessing_ml_error(ml_nlp):
     text1 = "Ceci est un exemple"
     text2 = "Ceci est un autre exemple"
@@ -376,54 +378,60 @@ def process_batch(x):
     assert items == [*range(0, 10), *range(10, 100, 2)]
 
 
-@validate_arguments
-class InnerComponent(TorchComponent):
-    def __init__(self, nlp=None, *args, **kwargs):
-        super().__init__()
-        self.called_forward = False
-
-    def preprocess(self, doc):
-        return {"text": doc.text}
+@pytest.mark.parametrize(
+    "backend",
+    ["simple", "multiprocesing"],
+)
+@pytest.mark.skipif(torch is None, reason="torch not installed")
+def test_backend_cache(backend):
+    import torch
 
-    def collate(self, batch: Dict[str, Any]) -> BatchInput:
-        return {"sizes": torch.as_tensor([len(x) for x in batch["text"]])}
+    from edsnlp.core.torch_component import (
+        BatchInput,
+        BatchOutput,
+        TorchComponent,
+        _caches,
+    )
 
-    def forward(self, batch):
-        assert not self.called_forward
-        self.called_forward = True
-        return {"sizes": batch["sizes"] * 2}
+    @validate_arguments
+    class InnerComponent(TorchComponent):
+        def __init__(self, nlp=None, *args, **kwargs):
+            super().__init__()
+            self.called_forward = False
 
+        def preprocess(self, doc):
+            return {"text": doc.text}
 
-@validate_arguments
-class OuterComponent(TorchComponent):
-    def __init__(self, inner):
-        super().__init__()
-        self.inner = inner
+        def collate(self, batch: Dict[str, Any]) -> BatchInput:
+            return {"sizes": torch.as_tensor([len(x) for x in batch["text"]])}
 
-    def preprocess(self, doc):
-        return {"inner": self.inner.preprocess(doc)}
+        def forward(self, batch):
+            assert not self.called_forward
+            self.called_forward = True
+            return {"sizes": batch["sizes"] * 2}
 
-    def collate(self, batch: Dict[str, Any]) -> BatchInput:
-        return {"inner": self.inner.collate(batch["inner"])}
+    @validate_arguments
+    class OuterComponent(TorchComponent):
+        def __init__(self, inner):
+            super().__init__()
+            self.inner = inner
 
-    def forward(self, batch: BatchInput) -> BatchOutput:
-        return {"inner": self.inner(batch["inner"])["sizes"].clone()}
+        def preprocess(self, doc):
+            return {"inner": self.inner.preprocess(doc)}
 
-    def postprocess(
-        self,
-        docs: Sequence[Doc],
-        results: BatchOutput,
-        inputs: List[Dict[str, Any]],
-    ) -> Sequence[Doc]:
-        return docs
+        def collate(self, batch: Dict[str, Any]) -> BatchInput:
+            return {"inner": self.inner.collate(batch["inner"])}
 
+        def forward(self, batch: BatchInput) -> BatchOutput:
+            return {"inner": self.inner(batch["inner"])["sizes"].clone()}
 
-@pytest.mark.parametrize(
-    "backend",
-    ["simple", "multiprocesing"],
-)
-def test_backend_cache(backend):
-    from edsnlp.core.torch_component import _caches
+        def postprocess(
+            self,
+            docs: Sequence[Doc],
+            results: BatchOutput,
+            inputs: List[Dict[str, Any]],
+        ) -> Sequence[Doc]:
+            return docs
 
     nlp = edsnlp.blank("eds")
     nlp.add_pipe(InnerComponent(), name="inner")

diff --git a/tests/test_docs.py b/tests/test_docs.py
@@ -4,6 +4,14 @@
 import pytest
 
 pytest.importorskip("mkdocs")
+try:
+    import torch.nn
+except ImportError:
+    torch = None
+
+if torch is None:
+    pytest.skip("torch not installed", allow_module_level=True)
+pytest.importorskip("rich")
 
 from extract_docs_code import extract_docs_code  # noqa: E402
 

diff --git a/tests/test_entrypoints.py b/tests/test_entrypoints.py
@@ -1,10 +1,19 @@
 import catalogue
+import pytest
 
 try:
     from importlib.metadata import entry_points
 except ImportError:
     from importlib_metadata import entry_points
 
+try:
+    import torch.nn
+except ImportError:
+    torch = None
+
+if torch is None:
+    pytest.skip("torch not installed", allow_module_level=True)
+
 
 def test_entrypoints():
     ep = entry_points()