diff --git a/changes/417.changed b/changes/417.changed new file mode 100644 index 00000000..3c206ef8 --- /dev/null +++ b/changes/417.changed @@ -0,0 +1 @@ +`description.md` are now `Optional` in `DatasampleSpec`. If no description file is given, `Substra` will generate a template indicating how to add a custom description file during dataset registration. \ No newline at end of file diff --git a/references/sdk_schemas.md b/references/sdk_schemas.md index 3df140a0..fbeec681 100644 --- a/references/sdk_schemas.md +++ b/references/sdk_schemas.md @@ -33,10 +33,12 @@ the 'paths' field. Specification for creating a dataset note : metadata field does not accept strings containing '__' as dict key + +note : If no description markdown file is given, create an empty one on the data_opener folder. ```text - name: - data_opener: -- description: +- description: typing.Optional[pathlib.Path] - permissions: - metadata: typing.Optional[typing.Dict[str, str]] - logs_permission: diff --git a/substra/sdk/schemas.py b/substra/sdk/schemas.py index 6d5c3a01..fef0a7c8 100644 --- a/substra/sdk/schemas.py +++ b/substra/sdk/schemas.py @@ -19,6 +19,13 @@ "summary_task": "task", } +GENERATED_DESCRIPTION_CONTENT = """ +# No description given + +To add a dataset description, create a markdown file and pass it to your +`substra.sdk.schemas.DatasetSpec` on your dataset opener registration. +""" + class BackendType(str, enum.Enum): REMOTE = "remote" @@ -278,17 +285,30 @@ class DatasetSpec(_Spec): """Specification for creating a dataset note : metadata field does not accept strings containing '__' as dict key + + note : If no description markdown file is given, create an empty one on the data_opener folder. """ name: str data_opener: pathlib.Path # Path to the data opener - description: pathlib.Path # Path to the description file + description: Optional[pathlib.Path] = None # Path to the description file permissions: Permissions metadata: Optional[Dict[str, str]] = None logs_permission: Permissions type_: typing.ClassVar[Type] = Type.Dataset + @pydantic.model_validator(mode="before") + @classmethod + def _check_description(cls, values): + if "description" not in values: + parent_path = pathlib.Path(values["data_opener"]).parent + description_path = parent_path / "generated_description.md" + with description_path.open("w", encoding="utf-8") as f: + f.write(GENERATED_DESCRIPTION_CONTENT) + values["description"] = description_path + return values + class Meta: file_attributes = ( "data_opener", diff --git a/tests/sdk/test_schemas.py b/tests/sdk/test_schemas.py index 9c50c757..28b1164f 100644 --- a/tests/sdk/test_schemas.py +++ b/tests/sdk/test_schemas.py @@ -4,6 +4,8 @@ import pytest from substra.sdk.schemas import DataSampleSpec +from substra.sdk.schemas import DatasetSpec +from substra.sdk.schemas import Permissions @pytest.mark.parametrize("path", [pathlib.Path() / "data", "./data", pathlib.Path().cwd() / "data"]) @@ -38,3 +40,18 @@ def test_datasample_spec_paths_set_to_none(): def test_datasample_spec_path_set_to_none(): with pytest.raises(ValueError): DataSampleSpec(path=None, data_manager_keys=[str(uuid.uuid4())]) + + +def test_dataset_spec_no_description(tmpdir): + + opener_path = tmpdir / "fake_opener.py" + permissions = Permissions(public=True, authorized_ids=[]) + + DatasetSpec( + name="Fake Dataset", + data_opener=str(opener_path), + permissions=permissions, + logs_permission=permissions, + ) + + assert (pathlib.Path(opener_path).parent / "generated_description.md").exists