Skip to content

Commit

Permalink
test: support pytorch-less tests
Browse files Browse the repository at this point in the history
  • Loading branch information
percevalw committed Dec 12, 2024
1 parent 3da7bcf commit 352247c
Show file tree
Hide file tree
Showing 13 changed files with 155 additions and 89 deletions.
9 changes: 8 additions & 1 deletion .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ jobs:
strategy:
fail-fast: true
matrix:
python-version: ["3.7", "3.8", "3.9"]
python-version: ["3.7", "3.8", "3.9", "3.10"]
steps:
- uses: actions/checkout@v2

Expand Down Expand Up @@ -75,10 +75,17 @@ jobs:
run: |
pip install poetry
pip install -e '.[dev,setup]' pytest-xdist pip
if: matrix.python-version != '3.10'
# uv venv
# source .venv/bin/activate
# uv pip install -e '.[dev,setup]' pytest-xdist pip

- name: Install dependencies
run: |
pip install poetry
pip install -e '.[dev-no-ml,setup]' pytest-xdist pip
if: matrix.python-version == '3.10'

- name: Test with Pytest on Python ${{ matrix.python-version }}
env:
UMLS_API_KEY: ${{ secrets.UMLS_API_KEY }}
Expand Down
1 change: 1 addition & 0 deletions changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
- Bubble BaseComponent instantiation errors correctly
- Improved support for multi-gpu gradient accumulation (only sync the gradients at the end of the accumulation), now controled by the optiona `sub_batch_size` argument of `TrainingData`.
- Support again edsnlp without pytorch installed
- We now test that edsnlp works without pytorch installed

## v0.14.0 (2024-11-14)

Expand Down
49 changes: 14 additions & 35 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ dependencies = [
"pydantic-core<2.0.0; python_version<'3.8'",
]
[project.optional-dependencies]
dev = [
dev-no-ml = [
"pre-commit>=2.0.0; python_version<'3.8'",
"pre-commit>=2.21.0; python_version>='3.8'",
"pytest>=7.1.0",
Expand All @@ -48,35 +48,12 @@ dev = [
"pyspark",
"polars",

# Machine Learning
"rich-logger>=0.3.1",
"torch>=1.13.0",
"foldedtensor>=0.3.2",
"safetensors>=0.3.0",
"transformers>=4.0.0,<5.0.0",
"accelerate>=0.20.3,<1.0.0",
"mlconjug3<3.9.0",
"scikit-learn>=1.0.0",

# Docs (same as docs group)
"mike~=1.1.2",
"mkdocs-charts-plugin==0.0.8",
"mkdocs-img2fig-plugin==0.9.3",
"mkdocs-material~=9.2.0",
"mkdocs-section-index==0.3.4",
"mkdocs~=1.5.2",
"mkdocstrings~=0.20",
"mkdocstrings-python~=1.1",
"mkdocs-minify-plugin",
"mkdocs-redirects>=1.2.1;python_version>='3.8'",
"pybtex~=0.24.0",
"pathspec>=0.11.1", # required by vendored mkdocs-autorefs PR
"astunparse",
"griffe<0.39",
"jedi",
"html5lib",
"edsnlp[docs-no-ml]",
]
docs = [
docs-no-ml = [
"mike~=1.1.2",
"mkdocs-charts-plugin==0.0.8",
"mkdocs-img2fig-plugin==0.9.3",
Expand All @@ -93,15 +70,6 @@ docs = [
"griffe<0.39",
"jedi",
"html5lib",

"torch>=1.13.0",
"foldedtensor>=0.3.2",
"transformers>=4.0.0,<5.0.0",
"safetensors>=0.3.0",
"rich-logger>=0.3.1",
]
setup = [
"typer"
]
ml = [
"rich-logger>=0.3.1",
Expand All @@ -111,6 +79,17 @@ ml = [
"transformers>=4.0.0,<5.0.0",
"accelerate>=0.20.3,<1.0.0",
]
docs = [
"edsnlp[docs-no-ml]",
"edsnlp[ml]",
]
dev = [
"edsnlp[dev-no-ml]",
"edsnlp[ml]",
]
setup = [
"typer"
]

[project.urls]
"Source Code" = "https://github.com/aphp/edsnlp"
Expand Down
10 changes: 10 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,12 @@
except AttributeError:
pass
logging.basicConfig(level=logging.INFO)
try:
import torch.nn
except ImportError:
torch = None

pytest.importorskip("rich")


@fixture(scope="session", params=["eds", "fr"])
Expand Down Expand Up @@ -75,11 +81,15 @@ def make_ml_pipeline():

@fixture()
def ml_nlp():
if torch is None:
pytest.skip("torch not installed", allow_module_level=False)
return make_ml_pipeline()


@fixture(scope="session")
def frozen_ml_nlp():
if torch is None:
pytest.skip("torch not installed", allow_module_level=False)
return make_ml_pipeline()


Expand Down
6 changes: 6 additions & 0 deletions tests/data/test_stream.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,11 @@
import edsnlp
from edsnlp.utils.collections import ld_to_dl

try:
import torch.nn
except ImportError:
torch = None


def test_map_batches():
items = [1, 2, 3, 4, 5]
Expand Down Expand Up @@ -30,6 +35,7 @@ def test_flat_iterable(num_cpu_workers):


@pytest.mark.parametrize("num_gpu_workers", [0, 1, 2])
@pytest.mark.skipif(torch is None, reason="torch not installed")
def test_map_gpu(num_gpu_workers):
import torch

Expand Down
6 changes: 5 additions & 1 deletion tests/pipelines/test_pipelines.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,8 @@ def test_import_all():

for name in dir(edsnlp.pipes):
if not name.startswith("_") and "endlines" not in name:
getattr(edsnlp.pipes, name)
try:
getattr(edsnlp.pipes, name)
except (ImportError, AttributeError) as e:
if "torch" in str(e):
pass
10 changes: 10 additions & 0 deletions tests/pipelines/trainable/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
import pytest

try:
import torch.nn
except ImportError:
torch = None

if torch is None:
pytest.skip("torch not installed", allow_module_level=True)
pytest.importorskip("rich")
98 changes: 53 additions & 45 deletions tests/processing/test_backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,12 @@
from edsnlp.data.converters import get_current_tokenizer
from edsnlp.processing.multiprocessing import get_dispatch_schedule

try:
import torch.nn
except ImportError:
torch = None


docs = [
{
"note_id": 1234,
Expand Down Expand Up @@ -259,10 +265,8 @@ def test_multiprocessing_rb_error(ml_nlp):
list(docs)


try:
import torch

from edsnlp.core.torch_component import BatchInput, BatchOutput, TorchComponent
if torch is not None:
from edsnlp.core.torch_component import TorchComponent

class DeepLearningError(TorchComponent):
def __init__(self, *args, **kwargs):
Expand All @@ -282,10 +286,8 @@ def forward(self, batch):
raise RuntimeError("Deep learning error")
return {}

except (ImportError, AttributeError):
pass


@pytest.mark.skipif(torch is None, reason="torch not installed")
def test_multiprocessing_ml_error(ml_nlp):
text1 = "Ceci est un exemple"
text2 = "Ceci est un autre exemple"
Expand Down Expand Up @@ -376,54 +378,60 @@ def process_batch(x):
assert items == [*range(0, 10), *range(10, 100, 2)]


@validate_arguments
class InnerComponent(TorchComponent):
def __init__(self, nlp=None, *args, **kwargs):
super().__init__()
self.called_forward = False

def preprocess(self, doc):
return {"text": doc.text}
@pytest.mark.parametrize(
"backend",
["simple", "multiprocesing"],
)
@pytest.mark.skipif(torch is None, reason="torch not installed")
def test_backend_cache(backend):
import torch

def collate(self, batch: Dict[str, Any]) -> BatchInput:
return {"sizes": torch.as_tensor([len(x) for x in batch["text"]])}
from edsnlp.core.torch_component import (
BatchInput,
BatchOutput,
TorchComponent,
_caches,
)

def forward(self, batch):
assert not self.called_forward
self.called_forward = True
return {"sizes": batch["sizes"] * 2}
@validate_arguments
class InnerComponent(TorchComponent):
def __init__(self, nlp=None, *args, **kwargs):
super().__init__()
self.called_forward = False

def preprocess(self, doc):
return {"text": doc.text}

@validate_arguments
class OuterComponent(TorchComponent):
def __init__(self, inner):
super().__init__()
self.inner = inner
def collate(self, batch: Dict[str, Any]) -> BatchInput:
return {"sizes": torch.as_tensor([len(x) for x in batch["text"]])}

def preprocess(self, doc):
return {"inner": self.inner.preprocess(doc)}
def forward(self, batch):
assert not self.called_forward
self.called_forward = True
return {"sizes": batch["sizes"] * 2}

def collate(self, batch: Dict[str, Any]) -> BatchInput:
return {"inner": self.inner.collate(batch["inner"])}
@validate_arguments
class OuterComponent(TorchComponent):
def __init__(self, inner):
super().__init__()
self.inner = inner

def forward(self, batch: BatchInput) -> BatchOutput:
return {"inner": self.inner(batch["inner"])["sizes"].clone()}
def preprocess(self, doc):
return {"inner": self.inner.preprocess(doc)}

def postprocess(
self,
docs: Sequence[Doc],
results: BatchOutput,
inputs: List[Dict[str, Any]],
) -> Sequence[Doc]:
return docs
def collate(self, batch: Dict[str, Any]) -> BatchInput:
return {"inner": self.inner.collate(batch["inner"])}

def forward(self, batch: BatchInput) -> BatchOutput:
return {"inner": self.inner(batch["inner"])["sizes"].clone()}

@pytest.mark.parametrize(
"backend",
["simple", "multiprocesing"],
)
def test_backend_cache(backend):
from edsnlp.core.torch_component import _caches
def postprocess(
self,
docs: Sequence[Doc],
results: BatchOutput,
inputs: List[Dict[str, Any]],
) -> Sequence[Doc]:
return docs

nlp = edsnlp.blank("eds")
nlp.add_pipe(InnerComponent(), name="inner")
Expand Down
8 changes: 8 additions & 0 deletions tests/test_docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,14 @@
import pytest

pytest.importorskip("mkdocs")
try:
import torch.nn
except ImportError:
torch = None

if torch is None:
pytest.skip("torch not installed", allow_module_level=True)
pytest.importorskip("rich")

from extract_docs_code import extract_docs_code # noqa: E402

Expand Down
9 changes: 9 additions & 0 deletions tests/test_entrypoints.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,19 @@
import catalogue
import pytest

try:
from importlib.metadata import entry_points
except ImportError:
from importlib_metadata import entry_points

try:
import torch.nn
except ImportError:
torch = None

if torch is None:
pytest.skip("torch not installed", allow_module_level=True)


def test_entrypoints():
ep = entry_points()
Expand Down
Loading

0 comments on commit 352247c

Please sign in to comment.