Skip to content

Commit

Permalink
chore: descritpion.md are now optional (#417)
Browse files Browse the repository at this point in the history
Signed-off-by: ThibaultFy <thibault.fouqueray@gmail.com>
  • Loading branch information
ThibaultFy authored Jun 19, 2024
1 parent d9961cc commit dd01b9b
Show file tree
Hide file tree
Showing 4 changed files with 42 additions and 2 deletions.
1 change: 1 addition & 0 deletions changes/417.changed
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
`description.md` are now `Optional` in `DatasampleSpec`. If no description file is given, `Substra` will generate a template indicating how to add a custom description file during dataset registration.
4 changes: 3 additions & 1 deletion references/sdk_schemas.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,12 @@ the 'paths' field.
Specification for creating a dataset

note : metadata field does not accept strings containing '__' as dict key

note : If no description markdown file is given, create an empty one on the data_opener folder.
```text
- name: <class 'str'>
- data_opener: <class 'pathlib.Path'>
- description: <class 'pathlib.Path'>
- description: typing.Optional[pathlib.Path]
- permissions: <class 'substra.sdk.schemas.Permissions'>
- metadata: typing.Optional[typing.Dict[str, str]]
- logs_permission: <class 'substra.sdk.schemas.Permissions'>
Expand Down
22 changes: 21 additions & 1 deletion substra/sdk/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,13 @@
"summary_task": "task",
}

GENERATED_DESCRIPTION_CONTENT = """
# No description given
To add a dataset description, create a markdown file and pass it to your
`substra.sdk.schemas.DatasetSpec` on your dataset opener registration.
"""


class BackendType(str, enum.Enum):
REMOTE = "remote"
Expand Down Expand Up @@ -278,17 +285,30 @@ class DatasetSpec(_Spec):
"""Specification for creating a dataset
note : metadata field does not accept strings containing '__' as dict key
note : If no description markdown file is given, create an empty one on the data_opener folder.
"""

name: str
data_opener: pathlib.Path # Path to the data opener
description: pathlib.Path # Path to the description file
description: Optional[pathlib.Path] = None # Path to the description file
permissions: Permissions
metadata: Optional[Dict[str, str]] = None
logs_permission: Permissions

type_: typing.ClassVar[Type] = Type.Dataset

@pydantic.model_validator(mode="before")
@classmethod
def _check_description(cls, values):
if "description" not in values:
parent_path = pathlib.Path(values["data_opener"]).parent
description_path = parent_path / "generated_description.md"
with description_path.open("w", encoding="utf-8") as f:
f.write(GENERATED_DESCRIPTION_CONTENT)
values["description"] = description_path
return values

class Meta:
file_attributes = (
"data_opener",
Expand Down
17 changes: 17 additions & 0 deletions tests/sdk/test_schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
import pytest

from substra.sdk.schemas import DataSampleSpec
from substra.sdk.schemas import DatasetSpec
from substra.sdk.schemas import Permissions


@pytest.mark.parametrize("path", [pathlib.Path() / "data", "./data", pathlib.Path().cwd() / "data"])
Expand Down Expand Up @@ -38,3 +40,18 @@ def test_datasample_spec_paths_set_to_none():
def test_datasample_spec_path_set_to_none():
with pytest.raises(ValueError):
DataSampleSpec(path=None, data_manager_keys=[str(uuid.uuid4())])


def test_dataset_spec_no_description(tmpdir):

opener_path = tmpdir / "fake_opener.py"
permissions = Permissions(public=True, authorized_ids=[])

DatasetSpec(
name="Fake Dataset",
data_opener=str(opener_path),
permissions=permissions,
logs_permission=permissions,
)

assert (pathlib.Path(opener_path).parent / "generated_description.md").exists

0 comments on commit dd01b9b

Please sign in to comment.